shithub: riscv

--- a/sys/man/4/ext2srv

+++ /dev/null

@@ -1,110 +1,0 @@

-.TH EXT2SRV 4

-.SH NAME

-ext2srv \- ext2 file system

-.SH SYNOPSIS

-.B ext2srv

-[

-.B -vrs

-] [

-.B -f

-.I file

-] [

-.B -p

-.I passwd

-] [

-.B -g

-.I group

-] [

-.I service

-]

-.SH DESCRIPTION

-.I Ext2srv

-is a file server that interprets the Linux Second Extended File System.

-A single instance of

-.I ext2srv

-can provide access to multiple ext2 partitions simultaneously.

-.PP

-.I Ext2srv

-posts a file descriptor named

-.I service

-(default

-.BR ext2 )

-in the

-.B /srv

-directory.

-To access an ext2 file system on a device, use

-.B mount

-with the

-.I spec

-argument

-(see

-.IR bind (1))

-the name of the file holding the raw ext2 file system, typically the disk or partition.

-If

-.I spec

-is undefined in the

-.BR mount ,

-.I ext2srv

-will use

-.I file

-as the default name for the device holding the file system.

-.PP

-Normally

-.I ext2srv

-creates a pipe to act as the communications channel between

-itself and its clients.

-The

-.B -s

-flag instructs

-.I ext2srv

-to use its standard input and output instead.

-This flag also prevents the creation of an explicit service file in

-.BR /srv .

-.PP

-The

-.B -v

-flag causes verbose output for debugging, while

-the

-.B -r

-flag (recommended) makes the file system read-only.

-The optional

-.B -p

-and

-.B -g

-flags specify Unix-format password (respectively group) files

-that give the mapping between the numeric user- and group-ID

-numbers in the ext2 file system and the strings reported by Plan 9 status

-inquiries.

-.PP

-There is no authentication or permission checking.

-Anyone who can access the ext2 file system will have full access

-to all its files, including write access if

-.I ext2srv

-is not started with the

-.B -r

-flag, irrespective of file ownership and permission flags.

-.PP

-Some file system state is cached in memory, and may

-be flushed only when the file system is unmounted.

-Therefore if

-.I ext2srv

-is stopped or the machine is rebooted while an ext2 file system

-is still mounted,

-the superblock on the device will have been marked `not valid'

-(unless the

-.B -r

-flag was used),

-and a

-.I fsck

-will be required before that file system may be mounted again.

-.SH BUGS

-There is no authentication or permission checking.

-The implementation has not tracked any changes to the ext2

-specification since it was written.

-There may be other bugs.

-It is advisable to use

-.I ext2srv

-in read-only mode whenever possible.

-.SH AUTHOR

-Bodet Laurent ([email protected]),

-with later updates by Russ Cox and Richard Miller.

--- /dev/null

+++ b/sys/man/4/ext4srv

@@ -1,0 +1,142 @@

+.TH EXT4SRV 4

+.SH NAME

+ext4srv \- ext4 file system

+.SH SYNOPSIS

+.B ext4srv

+[

+.B -Clrs

+] [

+.B -g

+.I groupfile

+] [

+.B -R

+.I uid

+] [

+.I service

+]

+.PP

+.B ext4srv

+.B -M

+.I (2|3|4)

+[

+.B -L

+.I label

+] [

+.B -b

+.I blksize

+] [

+.B -N

+.I numinodes

+] [

+.B -I

+.I inodesize

+]

+.I device

+.SH DESCRIPTION

+.I Ext4srv

+is a file server that interprets the Linux Second, Third and Fourth

+Extended File Systems.

+A single instance of

+.I ext4srv

+can provide access to multiple ext2, ext3 and ext4 partitions

+simultaneously.

+.PP

+.I Ext4srv

+posts a file descriptor named

+.I service

+(default

+.BR ext4 )

+in the

+.B /srv

+directory.

+To access an ext4 file system on a device, use

+.B mount

+with the

+.I spec

+argument

+(see

+.IR bind (1))

+the name of the file holding the raw ext4 file system, typically the disk or partition.

+If

+.I spec

+is undefined in the

+.BR mount ,

+.I ext4srv

+will use

+.I file

+as the default name for the device holding the file system.

+.PP

+Normally

+.I ext4srv

+creates a pipe to act as the communications channel between

+itself and its clients.

+The

+.B -s

+flag instructs

+.I ext4srv

+to use its standard input and output instead.

+This flag also prevents the creation of an explicit service file in

+.BR /srv .

+.PP

+The

+.B -r

+flag (recommended) makes the file system read-only.

+The optional

+.B -g

+flags specify Unix-format group file that give the mapping between the

+numeric user- and group-ID numbers in the ext4 file system and the

+strings reported by Plan 9 status inquiries.

+.PP

+With

+.B -R

+option the filesystem can be mounted in "root" mode, allowing full access regardless

+of permissions. The usual

+.I uid

+in this case is

+.IR 0 .

+.PP

+Optional flag

+.B -l

+enables symlink resolving, otherwise symlinks are hidden by

+default entirely, as Plan 9 does not have that concept.

+.PP

+Some file system state is cached in memory, and may

+be flushed only when the file system is unmounted if

+.B -C

+flag is used, which enabled write-back cache.

+Therefore if

+.I ext4srv

+is stopped or the machine is rebooted while an ext4 file system is

+still mounted, the superblock on the device will have been marked `not

+valid'

+(unless the

+.B -r

+flag was used).

+.SH MKFS

+A different mode of

+.I ext4srv

+is enabled with

+.B -M

+option that accepts the file system version

+.RI ( 2

+for

+.I ext2

+and so on).

+In this mode filesystem is initialized on the specified

+.I device

+and all existing data on it is destroyed.

+.PP

+Additional options may be specified, for example

+.B -L

+may be used to set the filesystem label.

+.SH BUGS

+Yes.

+.PP

+Permission checking is very basic and may not be complete.

+There may be many bugs.

+It is advisable to use

+.I ext4srv

+in read-only mode whenever possible.

+.SH HISTORY

+.I Ext4srv

+first appeared in 9front (February, 2024).

--- a/sys/src/cmd/aux/multi/mkfile

+++ b/sys/src/cmd/aux/multi/mkfile

@@ -33,7 +33,7 @@

 	dossrv\

 	echo\

ed\

-	ext2srv\

+	ext4srv\

 #	fcp\

 	grep\

 	hget\

--- a/sys/src/cmd/ext2srv/chat.c

+++ /dev/null

@@ -1,53 +1,0 @@

-#include <u.h>

-#include <libc.h>

-#include <fcall.h>

-#include <thread.h>

-#include <9p.h>

-#include "dat.h"

-#include "fns.h"

-#define	SIZE	1024

-#define	DOTDOT	(&fmt+1)

-int	chatty;

-void

-chat(char *fmt, ...)

-{

-	char buf[SIZE], *out;

-	va_list arg;

-	if (!chatty)

-		return;

-	va_start(arg, fmt);

-	out = vseprint(buf, buf+sizeof(buf), fmt, arg);

-	va_end(arg);

-	write(2, buf, (long)(out-buf));

-}

-void

-mchat(char *fmt, ...)

-{

-	char buf[SIZE], *out;

-	va_list arg;

-	va_start(arg, fmt);

-	out = vseprint(buf, buf+sizeof(buf), fmt, arg);

-	va_end(arg);

-	write(2, buf, (long)(out-buf));

-}

-void

-panic(char *fmt, ...)

-{

-	char buf[SIZE];

-	va_list arg;

-	int n;

-	n = sprint(buf, "%s %d: panic ", argv0, getpid());

-	va_start(arg, fmt);

-	vseprint(buf+n, buf+sizeof(buf)-n, fmt, arg);

-	va_end(arg);

-	fprint(2, "%s: %r\n", buf);

-	exits("panic");

-}

--- a/sys/src/cmd/ext2srv/dat.h

+++ /dev/null

@@ -1,222 +1,0 @@

-typedef struct Xfs	Xfs;

-typedef struct Xfile	Xfile;

-typedef struct Iobuf	Iobuf;

-typedef struct Ext2 Ext2;

-typedef struct SuperBlock SuperBlock;

-typedef struct GroupDesc GroupDesc;

-typedef struct Inode Inode;

-typedef struct DirEntry DirEntry;

-#define SECTORSIZE	512

-#define OFFSET_SUPER_BLOCK	1024

-#define EXT2_SUPER_MAGIC	0xEF53

-#define EXT2_MIN_BLOCK_SIZE  1024

-#define EXT2_MAX_BLOCK_SIZE  4096

-#define EXT2_ROOT_INODE	2

-#define EXT2_FIRST_INO		11

-#define EXT2_VALID_FS	0x0001

-#define EXT2_ERROR_FS	0x0002

-/*

- * Structure of the super block

- */

-struct SuperBlock {

-	uint	s_inodes_count;		/* Inodes count */

-	uint	s_blocks_count;		/* Blocks count */

-	uint	s_r_blocks_count;	/* Reserved blocks count */

-	uint	s_free_blocks_count;	/* Free blocks count */

-	uint	s_free_inodes_count;	/* Free inodes count */

-	uint	s_first_data_block;	/* First Data Block */

-	uint	s_log_block_size;	/* Block size */

-	int	s_log_frag_size;	/* Fragment size */

-	uint	s_blocks_per_group;	/* # Blocks per group */

-	uint	s_frags_per_group;	/* # Fragments per group */

-	uint	s_inodes_per_group;	/* # Inodes per group */

-	uint	s_mtime;		/* Mount time */

-	uint	s_wtime;		/* Write time */

-	ushort	s_mnt_count;		/* Mount count */

-	short	s_max_mnt_count;	/* Maximal mount count */

-	ushort	s_magic;		/* Magic signature */

-	ushort	s_state;		/* File system state */

-	ushort	s_errors;		/* Behaviour when detecting errors */

-	ushort	s_pad;

-	uint	s_lastcheck;		/* time of last check */

-	uint	s_checkinterval;	/* max. time between checks */

-	uint	s_creator_os;		/* OS */

-	uint	s_rev_level;		/* Revision level */

-	ushort	s_def_resuid;		/* Default uid for reserved blocks */

-	ushort	s_def_resgid;		/* Default gid for reserved blocks */

-	uint	s_reserved[235];	/* Padding to the end of the block */

-};

-/*

- * Structure of a blocks group descriptor

- */

-struct GroupDesc

-{

-	uint	bg_block_bitmap;		/* Blocks bitmap block */

-	uint	bg_inode_bitmap;		/* Inodes bitmap block */

-	uint	bg_inode_table;		/* Inodes table block */

-	ushort	bg_free_blocks_count;	/* Free blocks count */

-	ushort	bg_free_inodes_count;	/* Free inodes count */

-	ushort	bg_used_dirs_count;	/* Directories count */

-	ushort	bg_pad;

-	uint	bg_reserved[3];

-};

-/*

- * Constants relative to the data blocks

- */

-#define	EXT2_NDIR_BLOCKS		12

-#define	EXT2_IND_BLOCK			EXT2_NDIR_BLOCKS

-#define	EXT2_DIND_BLOCK			(EXT2_IND_BLOCK + 1)

-#define	EXT2_TIND_BLOCK			(EXT2_DIND_BLOCK + 1)

-#define	EXT2_N_BLOCKS			(EXT2_TIND_BLOCK + 1)

-/*

- * Structure of an inode on the disk

- */

-struct Inode {

-	ushort i_mode;		/* File mode */

-	ushort i_uid;		/* Owner Uid */

-	uint  i_size;		/* Size in bytes */

-	uint  i_atime;		/* Access time */

-	uint i_ctime;		/* Creation time */

-	uint  i_mtime;		/* Modification time */

-	uint  i_dtime;		/* Deletion Time */

-	ushort i_gid;		/* Group Id */

-	ushort i_links_count;	/* Links count */

-	uint  i_blocks;	/* Blocks count */

-	uint  i_flags;		/* File flags */

-	uint osd1;

-	uint	i_block[EXT2_N_BLOCKS];/* Pointers to blocks */

-	uint	i_version;	/* File version (for NFS) */

-	uint	i_file_acl;	/* File ACL */

-	uint	i_dir_acl;	/* Directory ACL */

-	uint	i_faddr;		/* Fragment address */

-	uchar osd2[12];

-};

-/*

- * Structure of a directory entry

- */

-#define EXT2_NAME_LEN 255

-#define DIR_REC_LEN(name_len)	(((name_len) + 8 + 3) & ~3)

-struct DirEntry {

-	uint	inode;			/* Inode number */

-	ushort	rec_len;		/* Directory entry length */

-	uchar	name_len;		/* Name length */

-	uchar	reserved;

-	char	name[EXT2_NAME_LEN];	/* File name */

-};

-#define S_IFMT  00170000

-#define S_IFLNK	 0120000

-#define S_IFREG  0100000

-#define S_IFDIR  0040000

-#define S_ISLNK(m)	(((m) & S_IFMT) == S_IFLNK)

-#define S_ISREG(m)	(((m) & S_IFMT) == S_IFREG)

-#define S_ISDIR(m)	(((m) & S_IFMT) == S_IFDIR)

-#define DEFAULT_UID	200

-#define DEFAULT_GID	100

-struct Iobuf

-{

-	Xfs *dev;

-	long	addr;

-	Iobuf *next;

-	Iobuf *prev;

-	Iobuf *hash;

-	int busy;

-	int dirty;

-	char *iobuf;

-};

-struct Xfs{

-	Xfs *next;

-	char *name;		/* of file containing external f.s. */

-	Qid	qid;		/* of file containing external f.s. */

-	long	ref;		/* attach count */

-	Qid	rootqid;	/* of plan9 constructed root directory */

-	short	dev;

-	short	fmt;

-	void *ptr;

-	/* data from super block */

-	int block_size;

-	int desc_per_block;

-	int inodes_per_group;

-	int inodes_per_block;

-	int addr_per_block;

-	int blocks_per_group;

-	int ngroups;

-	int superaddr, superoff;

-	int grpaddr;

-};

-struct Xfile{

-	Xfile *next;		/* in hash bucket */

-	long	client;

-	long	fid;

-	Xfs *	xf;

-	void *	ptr;

-	uint inbr;		/* inode nbr */

-	uint pinbr;	/* parrent inode */

-	ulong bufaddr;	/* addr of inode block */

-	ulong bufoffset;

-	int root;		/* true on attach for ref count */

-	int dirindex;	/* next dir entry to read */

-};

-#define EXT2_SUPER		1

-#define EXT2_DESC		2

-#define EXT2_BBLOCK	3

-#define EXT2_BINODE	4

-struct Ext2{

-	char type;

-	union{

-		SuperBlock *sb;

-		GroupDesc *gd;

-		char *bmp;

-	}u;

-	Iobuf *buf;

-};

-#define DESC_ADDR(xf,n)		( (xf)->grpaddr + ((n)/(xf)->desc_per_block) )

-#define DESC_OFFSET(xf,d,n)	( ((GroupDesc *)(d)) + ((n)%(xf)->desc_per_block) )

-enum{

-	Asis, Clean, Clunk

-};

-enum{

-	Enevermind,

-	Eformat,

-	Eio,

-	Enomem,

-	Enonexist,

-	Eexist,

-	Eperm,

-	Enofilsys,

-	Eauth,

-	Enospace,

-	Elink,

-	Elongname,

-	Eintern,

-	Ecorrupt,

-	Enotclean

-};

-extern int	chatty;

-extern int	errno;

-extern char	*deffile;

-extern int rdonly;

--- a/sys/src/cmd/ext2srv/errstr.h

+++ /dev/null

@@ -1,17 +1,0 @@

-char *errmsg[] = {

-	[Enevermind]	"never mind",

-	[Eformat]	"unknown format",

-	[Eio]		"I/O error",

-	[Enomem]	"server out of memory",

-	[Enonexist]	"file does not exist",

-	[Eexist]	"file already exist",

-	[Eperm]		"permission denied",

-	[Enofilsys]	"no file system device specified",

-	[Eauth]		"authentication failed",

-	[Enospace]	"no space on device",

-	[Elink]	"write is only allowed in regular files",

-	[Elongname]	"name is too long",

-	[Eintern]	"internal Ext2 error",

-	[Ecorrupt]	"corrupt filesystem",

-	[Enotclean] "fs not clean ... running e2fsck is recommended"

-};

--- a/sys/src/cmd/ext2srv/ext2fs.c

+++ /dev/null

@@ -1,348 +1,0 @@

-#include <u.h>

-#include <libc.h>

-#include <fcall.h>

-#include <thread.h>

-#include <9p.h>

-#include "dat.h"

-#include "fns.h"

-#define thdr	r->ifcall

-#define rhdr	r->ofcall

-extern int	errno;

-static void

-response(Req *r)

-{

-	char *err;

-	if (errno) {

-		err = xerrstr(errno);

-		chat("%s\n", err);

-		respond(r, err);

-	} else {

-		chat("OK\n");

-		respond(r, nil);

-	}

-}

-static void

-rattach(Req *r)

-{

-	Xfs *xf;

-	Xfile *root;

-	chat("attach(fid=%d,uname=\"%s\",aname=\"%s\",afid=\"%d\")...",

-		thdr.fid, thdr.uname, thdr.aname, thdr.afid);

-	errno = 0;

-	root = xfile(r->fid, Clean);

-	if(!root){

-		errno = Enomem;

-		goto error;

-	}

-	root->xf = xf = getxfs(thdr.aname);

-	if(!xf)

-		goto error;

-	/* now attach root inode */

-	if( get_inode(root, EXT2_ROOT_INODE) < 0 )

-		goto error;

-	r->fid->qid.type = QTDIR;

-	r->fid->qid.vers = 0;

-	root->xf->rootqid = r->fid->qid;

-	root->pinbr = EXT2_ROOT_INODE;

-	root->root = 1;

-	rhdr.qid = r->fid->qid;

-error:

-	response(r);

-}

-static char *

-rclone(Fid *fid, Fid *newfid)

-{

-	Xfile *of = xfile(fid, Asis);

-	Xfile *nf = xfile(newfid, Clean);

-	chat("clone(fid=%d,newfid=%d)...", fid->fid, newfid->fid);

-	errno = 0;

-	if(!of)

-		errno = Eio;

-	else if(!nf)

-		errno = Enomem;

-	else{

-		Xfile *next = nf->next;

-		*nf = *of;

-		nf->next = next;

-		nf->fid = newfid->fid;

-		nf->root = 0;

-	}

-	chat("%s\n", errno? xerrstr(errno) : "OK");

-	return errno ? xerrstr(errno) : 0;

-}

-static char *

-rwalk1(Fid *fid, char *name, Qid *qid)

-{

-	Xfile *f=xfile(fid, Asis);

-	int nr, sinbr = 0;

-	chat("walk1(fid=%d,name=\"%s\")...", fid->fid, name);

-	errno = 0;

-	if( !f ){

-		chat("no xfile...");

-		goto error;

-	}

-	if( !(fid->qid.type & QTDIR) ){

-		chat("qid.type=0x%x...", fid->qid.type);

-		goto error;

-	}

-	sinbr = f->pinbr;

-	if( name == 0 || name[0] == 0 || !strcmp(name, ".") ){

-		*qid = fid->qid;

-		goto ok;

-	}else if( !strcmp(name, "..") ){

-		if( fid->qid.path == f->xf->rootqid.path ){

-			chat("walkup from root...");

-			*qid = fid->qid;

-			goto ok;

-		}

-		if( get_inode(f, f->pinbr) < 0 )

-			goto error;

-		if( f->pinbr == EXT2_ROOT_INODE ){

-			*qid = f->xf->rootqid;

-			f->pinbr = EXT2_ROOT_INODE;

-		} else {

-			*qid = (Qid){f->pinbr,0,QTDIR};

-			f->inbr = f->pinbr;

-			if( (nr = get_file(f, "..")) < 0 )

-				goto error;

-			f->pinbr = nr;

-		}

-	}else{

-		f->pinbr = f->inbr;

-		if( (nr = get_file(f, name)) < 0 )

-			goto error;

-		if( get_inode(f, nr) < 0 )

-			goto error;

-		*qid = (Qid){nr,0,0};

-		if( nr == EXT2_ROOT_INODE )

-			*qid = f->xf->rootqid;

-		else if( S_ISDIR(getmode(f)) )

-			 qid->type = QTDIR;

-		/*strcpy(f->name, thdr.name);*/

-	}

-ok:

-	chat("OK\n");

-	return 0;

-error:

-	f->pinbr = sinbr;

-	chat("%s\n", xerrstr(Enonexist));

-	return xerrstr(Enonexist);

-}

-static void

-rstat(Req *r)

-{

-	Xfile *f=xfile(r->fid, Asis);

-	chat("stat(fid=%d)...", thdr.fid);

-	errno = 0;

-	if( !f )

-		errno = Eio;

-	else{

-		dostat(r->fid->qid, f, &r->d);

-	}

-	response(r);

-}

-static void

-rwstat(Req *r)

-{

-	Xfile *f=xfile(r->fid, Asis);

-	chat("wstat(fid=%d)...", thdr.fid);

-	errno = 0;

-	if( !f )

-		errno = Eio;

-	else

-		dowstat(f, &r->d);

-	response(r);

-}

-static void

-rread(Req *r)

-{

-	Xfile *f;

-	int nr;

-	chat("read(fid=%d,offset=%lld,count=%d)...",

-		thdr.fid, thdr.offset, thdr.count);

-	errno = 0;

-	if ( !(f=xfile(r->fid, Asis)) )

-		goto error;

-	if( r->fid->qid.type & QTDIR ){

-		nr = readdir(f, r->rbuf, thdr.offset, thdr.count);

-	}else

-		nr = readfile(f, r->rbuf, thdr.offset, thdr.count);

-	if(nr >= 0){

-		rhdr.count = nr;

-		chat("rcnt=%d...OK\n", nr);

-		respond(r, nil);

-		return;

-	}

-error:

-	errno = Eio;

-	response(r);

-}

-static void

-rwrite(Req *r)

-{

-	Xfile *f; int nr;

-	chat("write(fid=%d,offset=%lld,count=%d)...",

-		thdr.fid, thdr.offset, thdr.count);

-	errno = 0;

-	if (!(f=xfile(r->fid, Asis)) ){

-		errno = Eio;

-		goto error;

-	}

-	if( !S_ISREG(getmode(f)) ){

-		errno = Elink;

-		goto error;

-	}

-	nr = writefile(f, thdr.data, thdr.offset, thdr.count);

-	if(nr >= 0){

-		rhdr.count = nr;

-		chat("rcnt=%d...OK\n", nr);

-		respond(r, nil);

-		return;

-	}

-	errno = Eio;

-error:

-	response(r);

-}

-static void

-destroyfid(Fid *fid)

-{

-	chat("destroy(fid=%d)\n", fid->fid);

-	xfile(fid, Clunk);

-	/*syncbuf(xf);*/

-}

-static void

-ropen(Req *r)

-{

-	Xfile *f;

-	chat("open(fid=%d,mode=%d)...", thdr.fid, thdr.mode);

-	errno = 0;

-	f = xfile(r->fid, Asis);

-	if( !f ){

-		errno = Eio;

-		goto error;

-	}

-	if(thdr.mode & OTRUNC){

-		if( !S_ISREG(getmode(f)) ){

-			errno = Eperm;

-			goto error;

-		}

-		if(truncfile(f) < 0){

-			goto error;

-		}

-	}

-	chat("f->qid=0x%8.8lux...", r->fid->qid.path);

-	rhdr.qid = r->fid->qid;

-error:

-	response(r);

-}

-static void

-rcreate(Req *r)

-{

-	Xfile *f;

-	int inr, perm;

-	chat("create(fid=%d,name=\"%s\",perm=%uo,mode=%d)...",

-		thdr.fid, thdr.name, thdr.perm, thdr.mode);

-	errno = 0;

-	if(strcmp(thdr.name, ".") == 0 || strcmp(thdr.name, "..") == 0){

-		errno = Eperm;

-		goto error;

-	}

-	f = xfile(r->fid, Asis);

-	if( !f ){

-		errno = Eio;

-		goto error;

-	}

-	if( strlen(thdr.name) > EXT2_NAME_LEN ){

-		chat("name too long ...");

-		errno = Elongname;

-		goto error;

-	}

-	/* create */

-	errno = 0;

-	if( thdr.perm & DMDIR ){

-		perm = (thdr.perm & ~0777) |

-				(getmode(f) & thdr.perm & 0777);

-		perm |= S_IFDIR;

-		inr = create_dir(f, thdr.name, perm);

-	}else{

-		perm = (thdr.perm & (~0777|0111)) |

-				(getmode(f) & thdr.perm & 0666);

-		perm |= S_IFREG;

-		inr = create_file(f, thdr.name, perm);

-	}

-	if( inr < 0 )

-		goto error;

-	/* fill with new inode */

-	f->pinbr = f->inbr;

-	if( get_inode(f, inr) < 0 ){

-		errno = Eio;

-		goto error;

-	}

-	r->fid->qid = (Qid){inr, 0, 0};

-	if( S_ISDIR(getmode(f)) )

-		r->fid->qid.type |= QTDIR;

-	chat("f->qid=0x%8.8lux...", r->fid->qid.path);

-	rhdr.qid = r->fid->qid;

-error:

-	response(r);

-}

-static void

-rremove(Req *r)

-{

-	Xfile *f=xfile(r->fid, Asis);

-	chat("remove(fid=%d) ...", thdr.fid);

-	errno = 0;

-	if(!f){

-		errno = Eio;

-		goto error;

-	}

-	/* check permission here !!!!*/

-	unlink(f);

-error:

-	response(r);

-}

-Srv ext2srv = {

-	.destroyfid =	destroyfid,

-	.attach =	rattach,

-	.stat =		rstat,

-	.wstat =	rwstat,

-	.clone =	rclone,

-	.walk1 =	rwalk1,

-	.open =		ropen,

-	.read =		rread,

-	.write =	rwrite,

-	.create =	rcreate,

-	.remove =	rremove,

-};

--- a/sys/src/cmd/ext2srv/ext2srv.man

+++ /dev/null

@@ -1,110 +1,0 @@

-.TH EXT2SRV 4

-.SH NAME

-ext2srv \- ext2 file system

-.SH SYNOPSIS

-.B ext2srv

-[

-.B -vrs

-] [

-.B -f

-.I file

-] [

-.B -p

-.I passwd

-] [

-.B -g

-.I group

-] [

-.I service

-]

-.SH DESCRIPTION

-.I Ext2srv

-is a file server that interprets the Linux Second Extended File System.

-A single instance of

-.I ext2srv

-can provide access to multiple ext2 partitions simultaneously.

-.PP

-.I Ext2srv

-posts a file descriptor named

-.I service

-(default

-.BR ext2 )

-in the

-.B /srv

-directory.

-To access an ext2 file system on a device, use

-.B mount

-with the

-.I spec

-argument

-(see

-.IR bind (1))

-the name of the file holding the raw ext2 file system, typically the disk or partition.

-If

-.I spec

-is undefined in the

-.BR mount ,

-.I ext2srv

-will use

-.I file

-as the default name for the device holding the file system.

-.PP

-Normally

-.I ext2srv

-creates a pipe to act as the communications channel between

-itself and its clients.

-The

-.B -s

-flag instructs

-.I ext2srv

-to use its standard input and output instead.

-This flag also prevents the creation of an explicit service file in

-.BR /srv .

-.PP

-The

-.B -v

-flag causes verbose output for debugging, while

-the

-.B -r

-flag (recommended) makes the file system read-only.

-The optional

-.B -p

-and

-.B -g

-flags specify Unix-format password (respectively group) files

-that give the mapping between the numeric user- and group-ID

-numbers in the ext2 file system and the strings reported by Plan 9 status

-inquiries.

-.PP

-There is no authentication or permission checking.

-Anyone who can access the ext2 file system will have full access

-to all its files, including write access if

-.I ext2srv

-is not started with the

-.B -r

-flag, irrespective of file ownership and permission flags.

-.PP

-Some file system state is cached in memory, and may

-be flushed only when the file system is unmounted.

-Therefore if

-.I ext2srv

-is stopped or the machine is rebooted while an ext2 file system

-is still mounted,

-the superblock on the device will have been marked `not valid'

-(unless the

-.B -r

-flag was used),

-and a

-.I fsck

-will be required before that file system may be mounted again.

-.SH BUGS

-There is no authentication or permission checking.

-The implementation has not tracked any changes to the ext2

-specification since it was written.

-There may be other bugs.

-It is advisable to use

-.I ext2srv

-in read-only mode whenever possible.

-.SH AUTHOR

-Bodet Laurent ([email protected]),

-with later updates by Russ Cox and Richard Miller.

--- a/sys/src/cmd/ext2srv/ext2subs.c

+++ /dev/null

@@ -1,1870 +1,0 @@

-/*

- * ext2subs.c version 0.20

- *

- * Some strategic functions come from linux/fs/ext2

- * kernel sources written by Remy Card.

- *

-*/

-#include <u.h>

-#include <libc.h>

-#include <bio.h>

-#include <fcall.h>

-#include <thread.h>

-#include <9p.h>

-#include "dat.h"

-#include "fns.h"

-#define putext2(e)	putbuf((e).buf)

-#define dirtyext2(e)	dirtybuf((e).buf)

-static Intmap *uidmap, *gidmap;

-static int

-getnum(char *s, int *n)

-{

-	char *r;

-	*n = strtol(s, &r, 10);

-	return (r != s);

-}

-static Intmap*

-idfile(char *f)

-{

-	Biobuf *bin;

-	Intmap *map;

-	char *fields[3];

-	char *s;

-	int nf, id;

-	map = allocmap(0);

-	bin = Bopen(f, OREAD);

-	if (bin == 0)

-		return 0;

-	while ((s = Brdline(bin, '\n')) != 0) {

-		s[Blinelen(bin)-1] = '\0';

-		nf = getfields(s, fields, 3, 0, ":");

-		if (nf == 3 && getnum(fields[2], &id))

-			insertkey(map, id, strdup(fields[0]));

-	}

-	Bterm(bin);

-	return map;

-}

-void

-uidfile(char *f)

-{

-	uidmap = idfile(f);

-}

-void

-gidfile(char *f)

-{

-	gidmap = idfile(f);

-}

-static char*

-mapuid(int id)

-{

-	static char s[12];

-	char *p;

-	if (uidmap && (p = lookupkey(uidmap, id)) != 0)

-		return p;

-	sprint(s, "%d", id);

-	return s;

-}

-static char*

-mapgid(int id)

-{

-	static char s[12];

-	char *p;

-	if (gidmap && (p = lookupkey(gidmap, id)) != 0)

-		return p;

-	sprint(s, "%d", id);

-	return s;

-}

-int

-ext2fs(Xfs *xf)

-{

-	SuperBlock superblock;

-	/* get the super block */

-	seek(xf->dev, OFFSET_SUPER_BLOCK, 0);

-	if( sizeof(SuperBlock) !=

-				read(xf->dev, &superblock, sizeof(SuperBlock)) ){

-		chat("can't read super block %r...", xf->dev);

-		errno = Eformat;

-		return -1;

-	}

-	if( superblock.s_magic != EXT2_SUPER_MAGIC ){

-		chat("Bad super block...");

-		errno = Eformat;

-		return -1;

-	}

-	if( !(superblock.s_state & EXT2_VALID_FS) ){

-		chat("fs not checked...");

-		errno = Enotclean;

-		return -1;

-	}

-	xf->block_size = EXT2_MIN_BLOCK_SIZE << superblock.s_log_block_size;

-	xf->desc_per_block = xf->block_size / sizeof (GroupDesc);

-	xf->inodes_per_group = superblock.s_inodes_per_group;

-	xf->inodes_per_block = xf->block_size / sizeof (Inode);

-	xf->addr_per_block = xf->block_size / sizeof (uint);

-	xf->blocks_per_group = superblock.s_blocks_per_group;

-	if( xf->block_size == OFFSET_SUPER_BLOCK )

-		xf->superaddr = 1, xf->superoff = 0, xf->grpaddr = 2;

-	else if( xf->block_size == 2*OFFSET_SUPER_BLOCK ||

-			xf->block_size == 4*OFFSET_SUPER_BLOCK )

-		xf->superaddr = 0, xf->superoff = OFFSET_SUPER_BLOCK, xf->grpaddr = 1;

-	else {

-		chat(" blocks of %d bytes are not supported...", xf->block_size);

-		errno = Eformat;

-		return -1;

-	}

-	chat("good super block...");

-	xf->ngroups = (superblock.s_blocks_count -

-				superblock.s_first_data_block +

-				superblock.s_blocks_per_group -1) /

-				superblock.s_blocks_per_group;

-	superblock.s_state &= ~EXT2_VALID_FS;

-	superblock.s_mnt_count++;

-	seek(xf->dev, OFFSET_SUPER_BLOCK, 0);

-	if( !rdonly && sizeof(SuperBlock) !=

-				write(xf->dev, &superblock, sizeof(SuperBlock)) ){

-		chat("can't write super block...");

-		errno = Eio;

-		return -1;

-	}

-	return 0;

-}

-Ext2

-getext2(Xfs *xf, char type, int n)

-{

-	Iobuf *bd;

-	Ext2 e;

-	switch(type){

-	case EXT2_SUPER:

-		e.buf = getbuf(xf, xf->superaddr);

-		if( !e.buf ) goto error;

-		e.u.sb = (SuperBlock *)(e.buf->iobuf + xf->superoff);

-		e.type = EXT2_SUPER;

-		break;

-	case EXT2_DESC:

-		e.buf = getbuf(xf, DESC_ADDR(xf, n));

-		if( !e.buf ) goto error;

-		e.u.gd = DESC_OFFSET(xf, e.buf->iobuf, n);

-		e.type = EXT2_DESC;

-		break;

-	case EXT2_BBLOCK:

-		bd = getbuf(xf, DESC_ADDR(xf, n));

-		if( !bd ) goto error;

-		e.buf = getbuf(xf, DESC_OFFSET(xf, bd->iobuf, n)->bg_block_bitmap);

-		if( !e.buf ){

-			putbuf(bd);

-			goto error;

-		}

-		putbuf(bd);

-		e.u.bmp = (char *)e.buf->iobuf;

-		e.type = EXT2_BBLOCK;

-		break;

-	case EXT2_BINODE:

-		bd = getbuf(xf, DESC_ADDR(xf, n));

-		if( !bd ) goto error;

-		e.buf = getbuf(xf, DESC_OFFSET(xf, bd->iobuf, n)->bg_inode_bitmap);

-		if( !e.buf ){

-			putbuf(bd);

-			goto error;

-		}

-		putbuf(bd);

-		e.u.bmp = (char *)e.buf->iobuf;

-		e.type = EXT2_BINODE;

-		break;

-	default:

-		goto error;

-	}

-	return e;

-error:

-	panic("getext2");

-	return e;

-}

-int

-get_inode( Xfile *file, uint nr )

-{

-	unsigned long block_group, block;

-	Xfs *xf = file->xf;

-	Ext2 ed, es;

-	es = getext2(xf, EXT2_SUPER, 0);

-	if(nr > es.u.sb->s_inodes_count ){

-		chat("inode number %d is too big...", nr);

-		putext2(es);

-		errno = Eio;

-		return -1;

-	}

-	putext2(es);

-	block_group = (nr - 1) / xf->inodes_per_group;

-	if( block_group >= xf->ngroups ){

-		chat("block group (%d) > groups count...", block_group);

-		errno = Eio;

-		return -1;

-	}

-	ed = getext2(xf, EXT2_DESC, block_group);

-	block = ed.u.gd->bg_inode_table + (((nr-1) % xf->inodes_per_group) /

-			xf->inodes_per_block);

-	putext2(ed);

-	file->bufoffset = (nr-1) % xf->inodes_per_block;

-	file->inbr = nr;

-	file->bufaddr= block;

-	return 1;

-}

-int

-get_file( Xfile *f, char *name)

-{

-	uint offset, nr, i;

-	Xfs *xf = f->xf;

-	Inode *inode;

-	int nblock;

-	DirEntry *dir;

-	Iobuf *buf, *ibuf;

-	if( !S_ISDIR(getmode(f)) )

-		return -1;

-	ibuf = getbuf(xf, f->bufaddr);

-	if( !ibuf )

-		return -1;

-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;

-	nblock = (inode->i_blocks * 512) / xf->block_size;

-	for(i=0 ; (i < nblock) && (i < EXT2_NDIR_BLOCKS) ; i++){

-		buf = getbuf(xf, inode->i_block[i]);

-		if( !buf ){

-			putbuf(ibuf);

-			return -1;

-		}

-		for(offset=0 ; offset < xf->block_size ;  ){

-			dir = (DirEntry *)(buf->iobuf + offset);

-			if( dir->name_len==strlen(name) &&

-					!strncmp(name, dir->name, dir->name_len) ){

-				nr = dir->inode;

-				putbuf(buf);

-				putbuf(ibuf);

-				return nr;

-			}

-			offset += dir->rec_len;

-		}

-		putbuf(buf);

-	}

-	putbuf(ibuf);

-	errno = Enonexist;

-	return -1;

-}

-char *

-getname(Xfile *f, char *str)

-{

-	Xfile ft;

-	int offset, i, len;

-	Xfs *xf = f->xf;

-	Inode *inode;

-	int nblock;

-	DirEntry *dir;

-	Iobuf *buf, *ibuf;

-	ft = *f;

-	if( get_inode(&ft, f->pinbr) < 0 )

-		return 0;

-	if( !S_ISDIR(getmode(&ft)) )

-		return 0;

-	ibuf = getbuf(xf, ft.bufaddr);

-	if( !ibuf )

-		return 0;

-	inode = ((Inode *)ibuf->iobuf) + ft.bufoffset;

-	nblock = (inode->i_blocks * 512) / xf->block_size;

-	for(i=0 ; (i < nblock) && (i < EXT2_NDIR_BLOCKS) ; i++){

-		buf = getbuf(xf, inode->i_block[i]);

-		if( !buf ){

-			putbuf(ibuf);

-			return 0;

-		}

-		for(offset=0 ; offset < xf->block_size ;  ){

-			dir = (DirEntry *)(buf->iobuf + offset);

-			if( f->inbr == dir->inode ){

-				len = (dir->name_len < EXT2_NAME_LEN) ? dir->name_len : EXT2_NAME_LEN;

-				if (str == 0)

-					str = malloc(len+1);

-				strncpy(str, dir->name, len);

-				str[len] = 0;

-				putbuf(buf);

-				putbuf(ibuf);

-				return str;

-			}

-			offset += dir->rec_len;

-		}

-		putbuf(buf);

-	}

-	putbuf(ibuf);

-	errno = Enonexist;

-	return 0;

-}

-void

-dostat(Qid qid, Xfile *f, Dir *dir )

-{

-	Inode *inode;

-	Iobuf *ibuf;

-	char *name;

-	memset(dir, 0, sizeof(Dir));

-	if(  f->inbr == EXT2_ROOT_INODE ){

-		dir->name = estrdup9p("/");

-		dir->qid = (Qid){0,0,QTDIR};

-		dir->mode = DMDIR | 0777;

-	}else{

-		ibuf = getbuf(f->xf, f->bufaddr);

-		if( !ibuf )

-			return;

-		inode = ((Inode *)ibuf->iobuf) + f->bufoffset;

-		dir->length = inode->i_size;

-		dir->atime = inode->i_atime;

-		dir->mtime = inode->i_mtime;

-		putbuf(ibuf);

-		name = getname(f, 0);

-		dir->name = name;

-		dir->uid = estrdup9p(mapuid(inode->i_uid));

-		dir->gid = estrdup9p(mapgid(inode->i_gid));

-		dir->qid = qid;

-		dir->mode = getmode(f);

-		if( qid.type & QTDIR )

-			dir->mode |= DMDIR;

-	}

-}

-int

-dowstat(Xfile *f, Dir *stat)

-{

-	Xfs *xf = f->xf;

-	Inode *inode;

-	Xfile fdir;

-	Iobuf *ibuf;

-	char name[EXT2_NAME_LEN+1];

-	/* change name */

-	getname(f, name);

-	if( stat->name && stat->name[0] != 0 && strcmp(name, stat->name) ){

-		/* get dir */

-		fdir = *f;

-		if( get_inode(&fdir, f->pinbr) < 0 ){

-			chat("can't get inode %d...", f->pinbr);

-			return -1;

-		}

-		ibuf = getbuf(xf, fdir.bufaddr);

-		if( !ibuf )

-			return -1;

-		inode = ((Inode *)ibuf->iobuf) +fdir.bufoffset;

-		/* Clean old dir entry */

-		if( delete_entry(xf, inode, f->inbr) < 0 ){

-			chat("delete entry failed...");

-			putbuf(ibuf);

-			return -1;

-		}

-		putbuf(ibuf);

-		/* add the new entry */

-		if( add_entry(&fdir, stat->name, f->inbr) < 0 ){

-			chat("add entry failed...");

-			return -1;

-		}

-	}

-	ibuf = getbuf(xf, f->bufaddr);

-	if( !ibuf )

-		return -1;

-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;

-	if (stat->mode != ~0)

-	if( (getmode(f) & 0777) != (stat->mode & 0777) ){

-		inode->i_mode = (getmode(f) & ~0777) | (stat->mode & 0777);

-		dirtybuf(ibuf);

-	}

-	if (stat->mtime != ~0)

-	if(  inode->i_mtime != stat->mtime ){

-		inode->i_mtime = stat->mtime;

-		dirtybuf(ibuf);

-	}

-	putbuf(ibuf);

-	return 1;

-}

-long

-readfile(Xfile *f, void *vbuf, vlong offset, long count)

-{

-	Xfs *xf = f->xf;

-	Inode *inode;

-	Iobuf *buffer, *ibuf;

-	long rcount;

-	int len, o, cur_block, baddr;

-	uchar *buf;

-	buf = vbuf;

-	ibuf = getbuf(xf, f->bufaddr);

-	if( !ibuf )

-		return -1;

-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;

-	if( offset >= inode->i_size ){

-		putbuf(ibuf);

-		return 0;

-	}

-	if( offset + count > inode->i_size )

-		count = inode->i_size - offset;

-	/* fast link */

-	if( S_ISLNK(getmode(f)) && (inode->i_size <= EXT2_N_BLOCKS<<2) ){

-		memcpy(&buf[0], ((char *)inode->i_block)+offset, count);

-		putbuf(ibuf);

-		return count;

-	}

-	chat("read block [ ");

-	cur_block = offset / xf->block_size;

-	o = offset % xf->block_size;

-	rcount = 0;

-	while( count > 0 ){

-		baddr = bmap(f, cur_block++);

-		if( !baddr ){

-			putbuf(ibuf);

-			return -1;

-		}

-		buffer = getbuf(xf, baddr);

-		if( !buffer ){

-			putbuf(ibuf);

-			return -1;

-		}

-		chat("%d ", baddr);

-		len = xf->block_size - o;

-		if( len > count )

-			len = count;

-		memcpy(&buf[rcount], &buffer->iobuf[o], len);

-		rcount += len;

-		count -= len;

-		o = 0;

-		putbuf(buffer);

-	}

-	chat("] ...");

-	inode->i_atime = time(0);

-	dirtybuf(ibuf);

-	putbuf(ibuf);

-	return rcount;

-}

-long

-readdir(Xfile *f, void *vbuf, vlong offset, long count)

-{

-	int off, i, len;

-	long rcount;

-	Xfs *xf = f->xf;

-	Inode *inode, *tinode;

-	int nblock;

-	DirEntry *edir;

-	Iobuf *buffer, *ibuf, *tbuf;

-	Dir pdir;

-	Xfile ft;

-	uchar *buf;

-	char name[EXT2_NAME_LEN+1];

-	unsigned int dirlen;

-	int index;

-	buf = vbuf;

-	if (offset == 0)

-		f->dirindex = 0;

-	if( !S_ISDIR(getmode(f)) )

-		return -1;

-	ibuf = getbuf(xf, f->bufaddr);

-	if( !ibuf )

-		return -1;

-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;

-	nblock = (inode->i_blocks * 512) / xf->block_size;

-	ft = *f;

-	chat("read block [ ");

-	index = 0;

-	for(i=0, rcount=0 ; (i < nblock) && (i < EXT2_NDIR_BLOCKS) ; i++){

-		buffer = getbuf(xf, inode->i_block[i]);

-		if( !buffer ){

-			putbuf(ibuf);

-			return -1;

-		}

-		chat("%d, ", buffer->addr);

-		for(off=0 ; off < xf->block_size ;  ){

-			edir = (DirEntry *)(buffer->iobuf + off);

-			off += edir->rec_len;

-			if( (edir->name[0] == '.' ) && (edir->name_len == 1))

-				continue;

-			if(edir->name[0] == '.' && edir->name[1] == '.' &&

-										edir->name_len == 2)

-				continue;

-			if( edir->inode == 0 ) /* for lost+found dir ... */

-				continue;

-			if( index++ < f->dirindex )

-				continue;

-			if( get_inode(&ft, edir->inode) < 0 ){

-				chat("can't find ino no %d ] ...", edir->inode);

-error:			putbuf(buffer);

-				putbuf(ibuf);

-				return -1;

-			}

-			tbuf = getbuf(xf, ft.bufaddr);

-			if( !tbuf )

-				goto error;

-			tinode = ((Inode *)tbuf->iobuf) + ft.bufoffset;

-			memset(&pdir, 0, sizeof(Dir));

-			/* fill plan9 dir struct */

-			pdir.name = name;

-			len = (edir->name_len < EXT2_NAME_LEN) ? edir->name_len : EXT2_NAME_LEN;

-			strncpy(pdir.name, edir->name, len);

-			pdir.name[len] = 0;

-// chat("name %s len %d\n", pdir.name, edir->name_len);

-			pdir.uid = mapuid(tinode->i_uid);

-			pdir.gid = mapgid(tinode->i_gid);

-			pdir.qid.path = edir->inode;

-			pdir.mode = tinode->i_mode;

-			if( edir->inode == EXT2_ROOT_INODE )

-				pdir.qid.path = f->xf->rootqid.path;

-			else if( S_ISDIR( tinode->i_mode) )

-				pdir.qid.type |= QTDIR;

-			if( pdir.qid.type & QTDIR )

-				pdir.mode |= DMDIR;

-			pdir.length = tinode->i_size;

-			pdir.atime = tinode->i_atime;

-			pdir.mtime = tinode->i_mtime;

-			putbuf(tbuf);

-			dirlen = convD2M(&pdir, &buf[rcount], count-rcount);

-			if ( dirlen <= BIT16SZ ) {

-				chat("] ...");

-				putbuf(buffer);

-				putbuf(ibuf);

-				return rcount;

-			}

-			rcount += dirlen;

-			f->dirindex++;

-		}

-		putbuf(buffer);

-	}

-	chat("] ...");

-	putbuf(ibuf);

-	return rcount;

-}

-int

-bmap( Xfile *f, int block )

-{

-	Xfs *xf = f->xf;

-	Inode *inode;

-	Iobuf *buf, *ibuf;

-	int addr;

-	int addr_per_block = xf->addr_per_block;

-	int addr_per_block_bits = ffz(~addr_per_block);

-	if(block < 0) {

-		chat("bmap() block < 0 ...");

-		return 0;

-	}

-	if(block >= EXT2_NDIR_BLOCKS + addr_per_block +

-		(1 << (addr_per_block_bits * 2)) +

-		((1 << (addr_per_block_bits * 2)) << addr_per_block_bits)) {

-		chat("bmap() block > big...");

-		return 0;

-	}

-	ibuf = getbuf(xf, f->bufaddr);

-	if( !ibuf )

-		return 0;

-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;

-	/* direct blocks */

-	if(block < EXT2_NDIR_BLOCKS){

-		putbuf(ibuf);

-		return inode->i_block[block];

-	}

-	block -= EXT2_NDIR_BLOCKS;

-	/* indirect blocks*/

-	if(block < addr_per_block) {

-		addr = inode->i_block[EXT2_IND_BLOCK];

-		if (!addr) goto error;

-		buf = getbuf(xf, addr);

-		if( !buf ) goto error;

-		addr = *(((uint *)buf->iobuf) + block);

-		putbuf(buf);

-		putbuf(ibuf);

-		return addr;

-	}

-	block -= addr_per_block;

-	/* double indirect blocks */

-	if(block < (1 << (addr_per_block_bits * 2))) {

-		addr = inode->i_block[EXT2_DIND_BLOCK];

-		if (!addr) goto error;

-		buf = getbuf(xf, addr);

-		if( !buf ) goto error;

-		addr = *(((uint *)buf->iobuf) + (block >> addr_per_block_bits));

-		putbuf(buf);

-		buf = getbuf(xf, addr);

-		if( !buf ) goto error;

-		addr = *(((uint *)buf->iobuf) + (block & (addr_per_block - 1)));

-		putbuf(buf);

-		putbuf(ibuf);

-		return addr;

-	}

-	block -= (1 << (addr_per_block_bits * 2));

-	/* triple indirect blocks */

-	addr = inode->i_block[EXT2_TIND_BLOCK];

-	if(!addr) goto error;

-	buf = getbuf(xf, addr);

-	if( !buf ) goto error;

-	addr = *(((uint *)buf->iobuf) + (block >> (addr_per_block_bits * 2)));

-	putbuf(buf);

-	if(!addr) goto error;

-	buf = getbuf(xf, addr);

-	if( !buf ) goto error;

-	addr = *(((uint *)buf->iobuf) +

-			((block >> addr_per_block_bits) & (addr_per_block - 1)));

-	putbuf(buf);

-	if(!addr) goto error;

-	buf = getbuf(xf, addr);

-	if( !buf ) goto error;

-	addr = *(((uint *)buf->iobuf) + (block & (addr_per_block - 1)));

-	putbuf(buf);

-	putbuf(ibuf);

-	return addr;

-error:

-	putbuf(ibuf);

-	return 0;

-}

-long

-writefile(Xfile *f, void *vbuf, vlong offset, long count)

-{

-	Xfs *xf = f->xf;

-	Inode *inode;

-	Iobuf *buffer, *ibuf;

-	long w;

-	int len, o, cur_block, baddr;

-	char *buf;

-	buf = vbuf;

-	ibuf = getbuf(xf, f->bufaddr);

-	if( !ibuf )

-		return -1;

-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;

-	chat("write block [ ");

-	cur_block = offset / xf->block_size;

-	o = offset % xf->block_size;

-	w = 0;

-	while( count > 0 ){

-		baddr = getblk(f, cur_block++);

-		if( baddr <= 0 )

-			goto end;

-		buffer = getbuf(xf, baddr);

-		if( !buffer )

-			goto end;

-		chat("%d ", baddr);

-		len = xf->block_size - o;

-		if( len > count )

-			len = count;

-		memcpy(&buffer->iobuf[o], &buf[w], len);

-		dirtybuf(buffer);

-		w += len;

-		count -= len;

-		o = 0;

-		putbuf(buffer);

-	}

-end:

-	if( inode->i_size < offset + w )

-		inode->i_size = offset + w;

-	inode->i_atime = inode->i_mtime = time(0);

-	dirtybuf(ibuf);

-	putbuf(ibuf);

-	chat("]...");

-	if( errno )

-		return -1;

-	return w;

-}

-int

-new_block( Xfile *f, int goal )

-{

-	Xfs *xf= f->xf;

-	int group, block, baddr, k, redo;

-	ulong lmap;

-	char *p, *r;

-	Iobuf *buf;

-	Ext2 ed, es, eb;

-	es = getext2(xf, EXT2_SUPER, 0);

-	redo = 0;

-repeat:

-	if( goal < es.u.sb->s_first_data_block || goal >= es.u.sb->s_blocks_count )

-		goal = es.u.sb->s_first_data_block;

-	group = (goal - es.u.sb->s_first_data_block) / xf->blocks_per_group;

-	ed = getext2(xf, EXT2_DESC, group);

-	eb = getext2(xf, EXT2_BBLOCK, group);

-	/*

-	 * First, test if goal block is free

-	 */

-	if( ed.u.gd->bg_free_blocks_count > 0 ){

-		block = (goal - es.u.sb->s_first_data_block) % xf->blocks_per_group;

-		if( !test_bit(block, eb.u.bmp) )

-			goto got_block;

-		if( block ){

-			/*

-			 * goal wasn't free ; search foward for a free

-			 * block within the next 32 blocks

-			*/

-			lmap = (((ulong *)eb.u.bmp)[block>>5]) >>

-					((block & 31) + 1);

-			if( block < xf->blocks_per_group - 32 )

-				lmap |= (((ulong *)eb.u.bmp)[(block>>5)+1]) <<

-					( 31-(block & 31) );

-			else

-				lmap |= 0xffffffff << ( 31-(block & 31) );

-			if( lmap != 0xffffffffl ){

-				k = ffz(lmap) + 1;

-				if( (block + k) < xf->blocks_per_group ){

-					block += k;

-					goto got_block;

-				}

-			}

-		}

-		/*

-		 * Search in the remaider of the group

-		*/

-		p = eb.u.bmp + (block>>3);

-		r = memscan(p, 0, (xf->blocks_per_group - block + 7) >>3);

-		k = ( r - eb.u.bmp )<<3;

-		if( k < xf->blocks_per_group ){

-			block = k;

-			goto search_back;

-		}

-		k = find_next_zero_bit((unsigned long *)eb.u.bmp,

-						xf->blocks_per_group>>3, block);

-		if( k < xf->blocks_per_group ){

-			block = k;

-			goto got_block;

-		}

-	}

-	/*

-	 * Search the rest of groups

-	*/

-	putext2(ed); putext2(eb);

-	for(k=0 ; k < xf->ngroups ; k++){

-		group++;

-		if( group >= xf->ngroups )

-			group = 0;

-		ed = getext2(xf, EXT2_DESC, group);

-		if( ed.u.gd->bg_free_blocks_count > 0 )

-			break;

-		putext2(ed);

-	}

-	if( redo && group == xf->ngroups-1 ){

-		putext2(ed);

-		goto full;

-	}

-	if( k >=xf->ngroups ){

-		/*

-		 * All groups are full or

-		 * we have retry (because the last block) and all other

-		 * groups are also full.

-		*/

-full:

-		chat("no free blocks ...");

-	 	putext2(es);

-		errno = Enospace;

-		return 0;

-	}

-	eb = getext2(xf, EXT2_BBLOCK, group);

-	r = memscan(eb.u.bmp,  0, xf->blocks_per_group>>3);

-	block = (r - eb.u.bmp) <<3;

-	if( block < xf->blocks_per_group )

-		goto search_back;

-	else

-		block = find_first_zero_bit((ulong *)eb.u.bmp,

-								xf->blocks_per_group>>3);

-	if( block >= xf->blocks_per_group ){

-		chat("Free block count courupted for block group %d...", group);

-		putext2(ed); putext2(eb); putext2(es);

-		errno = Ecorrupt;

-		return 0;

-	}

-search_back:

-	/*

-	 * A free byte was found in the block. Now search backwards up

-	 * to 7 bits to find the start of this group of free block.

-	*/

-	for(k=0 ; k < 7 && block > 0 &&

-		!test_bit(block-1, eb.u.bmp) ; k++, block--);

-got_block:

-	baddr = block + (group * xf->blocks_per_group) +

-				es.u.sb->s_first_data_block;

-	if( baddr == ed.u.gd->bg_block_bitmap ||

-	     baddr == ed.u.gd->bg_inode_bitmap ){

-		chat("Allocating block in system zone...");

-		putext2(ed); putext2(eb); putext2(es);

-		errno = Eintern;

-		return 0;

-	}

-	if( set_bit(block, eb.u.bmp) ){

-		chat("bit already set (%d)...", block);

-		putext2(ed); putext2(eb); putext2(es);

-		errno = Ecorrupt;

-		return 0;

-	}

-	dirtyext2(eb);

-	if( baddr >= es.u.sb->s_blocks_count ){

-		chat("block >= blocks count...");

-		errno = Eintern;

-error:

-		clear_bit(block, eb.u.bmp);

-		putext2(eb); putext2(ed); putext2(es);

-		return 0;

-	}

-	buf = getbuf(xf, baddr);

-	if( !buf ){

-		if( !redo ){

-			/*

-			 * It's perhaps the last block of the disk and

-			 * it can't be acceded because the last sector.

-			 * Therefore, we try one more time with goal at 0

-			 * to force scanning all groups.

-			*/

-			clear_bit(block, eb.u.bmp);

-			putext2(eb); putext2(ed);

-			goal = 0; errno = 0; redo++;

-			goto repeat;

-		}

-		goto error;

-	}

-	memset(&buf->iobuf[0], 0, xf->block_size);

-	dirtybuf(buf);

-	putbuf(buf);

-	es.u.sb->s_free_blocks_count--;

-	dirtyext2(es);

-	ed.u.gd->bg_free_blocks_count--;

-	dirtyext2(ed);

-	putext2(eb);

-	putext2(ed);

-	putext2(es);

-	chat("new ");

-	return baddr;

-}

-int

-getblk(Xfile *f, int block)

-{

-	Xfs *xf = f->xf;

-	int baddr;

-	int addr_per_block = xf->addr_per_block;

-	if (block < 0) {

-		chat("getblk() block < 0 ...");

-		return 0;

-	}

-	if(block > EXT2_NDIR_BLOCKS + addr_per_block +

-			addr_per_block * addr_per_block +

-			addr_per_block * addr_per_block * addr_per_block ){

-		chat("getblk() block > big...");

-		errno = Eintern;

-		return 0;

-	}

-	if( block < EXT2_NDIR_BLOCKS )

-		return inode_getblk(f, block);

-	block -= EXT2_NDIR_BLOCKS;

-	if( block < addr_per_block ){

-		baddr = inode_getblk(f, EXT2_IND_BLOCK);

-		baddr = block_getblk(f, baddr, block);

-		return baddr;

-	}

-	block -= addr_per_block;

-	if( block < addr_per_block * addr_per_block  ){

-		baddr = inode_getblk(f, EXT2_DIND_BLOCK);

-		baddr = block_getblk(f, baddr, block / addr_per_block);

-		baddr = block_getblk(f, baddr, block & ( addr_per_block-1));

-		return baddr;

-	}

-	block -= addr_per_block * addr_per_block;

-	baddr = inode_getblk(f, EXT2_TIND_BLOCK);

-	baddr = block_getblk(f, baddr, block / (addr_per_block * addr_per_block));

-	baddr = block_getblk(f, baddr, (block / addr_per_block) & ( addr_per_block-1));

-	return block_getblk(f, baddr, block & ( addr_per_block-1));

-}

-int

-block_getblk(Xfile *f, int rb, int nr)

-{

-	Xfs *xf = f->xf;

-	Inode *inode;

-	int tmp, goal = 0;

-	int blocks = xf->block_size / 512;

-	Iobuf *buf, *ibuf;

-	uint *p;

-	Ext2 es;

-	if( !rb )

-		return 0;

-	buf = getbuf(xf, rb);

-	if( !buf )

-		return 0;

-	p = (uint *)(buf->iobuf) + nr;

-	if( *p ){

-		tmp = *p;

-		putbuf(buf);

-		return tmp;

-	}

-	for(tmp=nr - 1 ; tmp >= 0 ; tmp--){

-		if( ((uint *)(buf->iobuf))[tmp] ){

-			goal = ((uint *)(buf->iobuf))[tmp];

-			break;

-		}

-	}

-	if( !goal ){

-		es = getext2(xf, EXT2_SUPER, 0);

-		goal = (((f->inbr -1) / xf->inodes_per_group) *

-				xf->blocks_per_group) +

-				es.u.sb->s_first_data_block;

-		putext2(es);

-	}

-	tmp = new_block(f, goal);

-	if( !tmp ){

-		putbuf(buf);

-		return 0;

-	}

-	*p = tmp;

-	dirtybuf(buf);

-	putbuf(buf);

-	ibuf = getbuf(xf, f->bufaddr);

-	if( !ibuf )

-		return -1;

-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;

-	inode->i_blocks += blocks;

-	dirtybuf(ibuf);

-	putbuf(ibuf);

-	return tmp;

-}

-int

-inode_getblk(Xfile *f, int block)

-{

-	Xfs *xf = f->xf;

-	Inode *inode;

-	Iobuf *ibuf;

-	int tmp, goal = 0;

-	int blocks = xf->block_size / 512;

-	Ext2 es;

-	ibuf = getbuf(xf, f->bufaddr);

-	if( !ibuf )

-		return -1;

-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;

-	if( inode->i_block[block] ){

-		putbuf(ibuf);

-		return inode->i_block[block];

-	}

-	for(tmp=block - 1 ; tmp >= 0 ; tmp--){

-		if( inode->i_block[tmp] ){

-			goal = inode->i_block[tmp];

-			break;

-		}

-	}

-	if( !goal ){

-		es = getext2(xf, EXT2_SUPER, 0);

-		goal = (((f->inbr -1) / xf->inodes_per_group) *

-				xf->blocks_per_group) +

-				es.u.sb->s_first_data_block;

-		putext2(es);

-	}

-	tmp = new_block(f, goal);

-	if( !tmp ){

-		putbuf(ibuf);

-		return 0;

-	}

-	inode->i_block[block] = tmp;

-	inode->i_blocks += blocks;

-	dirtybuf(ibuf);

-	putbuf(ibuf);

-	return tmp;

-}

-int

-new_inode(Xfile *f, int mode)

-{

-	Xfs *xf = f->xf;

-	Inode *inode, *finode;

-	Iobuf *buf, *ibuf;

-	int ave,group, i, j;

-	Ext2 ed, es, eb;

-	group = -1;

-	es = getext2(xf, EXT2_SUPER, 0);

-	if( S_ISDIR(mode) ){	/* create directory inode */

-		ave = es.u.sb->s_free_inodes_count / xf->ngroups;

-		for(i=0 ; i < xf->ngroups ; i++){

-			ed = getext2(xf, EXT2_DESC, i);

-			if( ed.u.gd->bg_free_inodes_count &&

-					ed.u.gd->bg_free_inodes_count >= ave ){

-				if( group<0 || ed.u.gd->bg_free_inodes_count >

-								ed.u.gd->bg_free_inodes_count )

-					group = i;

-			}

-			putext2(ed);

-		}

-	}else{		/* create file inode */

-		/* Try to put inode in its parent directory */

-		i = (f->inbr -1) / xf->inodes_per_group;

-		ed = getext2(xf, EXT2_DESC, i);

-		if( ed.u.gd->bg_free_inodes_count ){

-			group = i;

-			putext2(ed);

-		}else{

-			/*

-			 * Use a quadratic hash to find a group whith

-			 * a free inode

-			 */

-			putext2(ed);

-			for( j=1 ; j < xf->ngroups ; j <<= 1){

-				i += j;

-				if( i >= xf->ngroups )

-					i -= xf->ngroups;

-				ed = getext2(xf, EXT2_DESC, i);

-				if( ed.u.gd->bg_free_inodes_count ){

-					group = i;

-					putext2(ed);

-					break;

-				}

-				putext2(ed);

-			}

-		}

-		if( group < 0 ){

-			/* try a linear search */

-			i = ((f->inbr -1) / xf->inodes_per_group) + 1;

-			for(j=2 ; j < xf->ngroups ; j++){

-				if( ++i >= xf->ngroups )

-					i = 0;

-				ed = getext2(xf, EXT2_DESC, i);

-				if( ed.u.gd->bg_free_inodes_count ){

-					group = i;

-					putext2(ed);

-					break;

-				}

-				putext2(ed);

-			}

-		}

-	}

-	if( group < 0 ){

-		chat("group < 0...");

-		putext2(es);

-		return 0;

-	}

-	ed = getext2(xf, EXT2_DESC, group);

-	eb = getext2(xf, EXT2_BINODE, group);

-	if( (j = find_first_zero_bit(eb.u.bmp,

-			xf->inodes_per_group>>3)) < xf->inodes_per_group){

-		if( set_bit(j, eb.u.bmp) ){

-			chat("inode %d of group %d is already allocated...", j, group);

-			putext2(ed); putext2(eb); putext2(es);

-			errno = Ecorrupt;

-			return 0;

-		}

-		dirtyext2(eb);

-	}else if( ed.u.gd->bg_free_inodes_count != 0 ){

-		chat("free inodes count corrupted for group %d...", group);

-		putext2(ed); putext2(eb); putext2(es);

-		errno = Ecorrupt;

-		return 0;

-	}

-	i = j;

-	j += group * xf->inodes_per_group + 1;

-	if( j < EXT2_FIRST_INO || j >= es.u.sb->s_inodes_count ){

-		chat("reserved inode or inode > inodes count...");

-		errno = Ecorrupt;

-error:

-		clear_bit(i, eb.u.bmp);

-		putext2(eb); putext2(ed); putext2(es);

-		return 0;

-	}

-	buf = getbuf(xf, ed.u.gd->bg_inode_table +

-			(((j-1) % xf->inodes_per_group) /

-			xf->inodes_per_block));

-	if( !buf )

-		goto error;

-	inode = ((struct Inode *) buf->iobuf) +

-		((j-1) % xf->inodes_per_block);

-	memset(inode, 0, sizeof(Inode));

-	inode->i_mode = mode;

-	inode->i_links_count = 1;

-	inode->i_uid = DEFAULT_UID;

-	inode->i_gid = DEFAULT_GID;

-	inode->i_mtime = inode->i_atime = inode->i_ctime = time(0);

-	dirtybuf(buf);

-	ibuf = getbuf(xf, f->bufaddr);

-	if( !ibuf ){

-		putbuf(buf);

-		goto error;

-	}

-	finode = ((Inode *)ibuf->iobuf) + f->bufoffset;

-	inode->i_flags = finode->i_flags;

-	inode->i_uid = finode->i_uid;

-	inode->i_gid = finode->i_gid;

-	dirtybuf(ibuf);

-	putbuf(ibuf);

-	putbuf(buf);

-	ed.u.gd->bg_free_inodes_count--;

-	if( S_ISDIR(mode) )

-		ed.u.gd->bg_used_dirs_count++;

-	dirtyext2(ed);

-	es.u.sb->s_free_inodes_count--;

-	dirtyext2(es);

-	putext2(eb);

-	putext2(ed);

-	putext2(es);

-	return j;

-}

-int

-create_file(Xfile *fdir, char *name, int mode)

-{

-	int inr;

-	inr = new_inode(fdir, mode);

-	if( !inr ){

-		chat("create one new inode failed...");

-		return -1;

-	}

-	if( add_entry(fdir, name, inr) < 0 ){

-		chat("add entry failed...");

-		free_inode(fdir->xf, inr);

-		return -1;

-	}

-	return inr;

-}

-void

-free_inode( Xfs *xf, int inr)

-{

-	Inode *inode;

-	ulong b, bg;

-	Iobuf *buf;

-	Ext2 ed, es, eb;

-	bg = (inr -1) / xf->inodes_per_group;

-	b = (inr -1) % xf->inodes_per_group;

-	ed = getext2(xf, EXT2_DESC, bg);

-	buf = getbuf(xf, ed.u.gd->bg_inode_table +

-			(b / xf->inodes_per_block));

-	if( !buf ){

-		putext2(ed);

-		return;

-	}

-	inode = ((struct Inode *) buf->iobuf) +

-		((inr-1) % xf->inodes_per_block);

-	if( S_ISDIR(inode->i_mode) )

-		ed.u.gd->bg_used_dirs_count--;

-	memset(inode, 0, sizeof(Inode));

-	inode->i_dtime = time(0);

-	dirtybuf(buf);

-	putbuf(buf);

-	ed.u.gd->bg_free_inodes_count++;

-	dirtyext2(ed);

-	putext2(ed);

-	eb = getext2(xf, EXT2_BINODE, bg);

-	clear_bit(b, eb.u.bmp);

-	dirtyext2(eb);

-	putext2(eb);

-	es = getext2(xf, EXT2_SUPER, 0);

-	es.u.sb->s_free_inodes_count++;

-	dirtyext2(es); putext2(es);

-}

-int

-create_dir(Xfile *fdir, char *name, int mode)

-{

-	Xfs *xf = fdir->xf;

-	DirEntry *de;

-	Inode *inode;

-	Iobuf *buf, *ibuf;

-	Xfile tf;

-	int inr, baddr;

-	inr = new_inode(fdir, mode);

-	if( inr == 0 ){

-		chat("create one new inode failed...");

-		return -1;

-	}

-	if( add_entry(fdir, name, inr) < 0 ){

-		chat("add entry failed...");

-		free_inode(fdir->xf, inr);

-		return -1;

-	}

-	/* create the empty dir */

-	tf = *fdir;

-	if( get_inode(&tf, inr) < 0 ){

-		chat("can't get inode %d...", inr);

-		free_inode(fdir->xf, inr);

-		return -1;

-	}

-	ibuf = getbuf(xf, tf.bufaddr);

-	if( !ibuf ){

-		free_inode(fdir->xf, inr);

-		return -1;

-	}

-	inode = ((Inode *)ibuf->iobuf) + tf.bufoffset;

-	baddr = inode_getblk(&tf, 0);

-	if( !baddr ){

-		putbuf(ibuf);

-		ibuf = getbuf(xf, fdir->bufaddr);

-		if( !ibuf ){

-			free_inode(fdir->xf, inr);

-			return -1;

-		}

-		inode = ((Inode *)ibuf->iobuf) + fdir->bufoffset;

-		delete_entry(fdir->xf, inode, inr);

-		putbuf(ibuf);

-		free_inode(fdir->xf, inr);

-		return -1;

-	}

-	inode->i_size = xf->block_size;

-	buf = getbuf(xf, baddr);

-	de = (DirEntry *)buf->iobuf;

-	de->inode = inr;

-	de->name_len = 1;

-	de->rec_len = DIR_REC_LEN(de->name_len);

-	strcpy(de->name, ".");

-	de = (DirEntry *)( (char *)de + de->rec_len);

-	de->inode = fdir->inbr;

-	de->name_len = 2;

-	de->rec_len = xf->block_size - DIR_REC_LEN(1);

-	strcpy(de->name, "..");

-	dirtybuf(buf);

-	putbuf(buf);

-	inode->i_links_count = 2;

-	dirtybuf(ibuf);

-	putbuf(ibuf);

-	ibuf = getbuf(xf, fdir->bufaddr);

-	if( !ibuf )

-		return -1;

-	inode = ((Inode *)ibuf->iobuf) + fdir->bufoffset;

-	inode->i_links_count++;

-	dirtybuf(ibuf);

-	putbuf(ibuf);

-	return inr;

-}

-int

-add_entry(Xfile *f, char *name, int inr)

-{

-	Xfs *xf = f->xf;

-	DirEntry *de, *de1;

-	int offset, baddr;

-	int rec_len, cur_block;

-	int namelen = strlen(name);

-	Inode *inode;

-	Iobuf *buf, *ibuf;

-	ibuf = getbuf(xf, f->bufaddr);

-	if( !ibuf )

-		return -1;

-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;

-	if( inode->i_size == 0 ){

-		chat("add_entry() no entry !!!...");

-		putbuf(ibuf);

-		return -1;

-	}

-	cur_block = offset = 0;

-	rec_len = DIR_REC_LEN(namelen);

-	buf = getbuf(xf, inode->i_block[cur_block++]);

-	if( !buf ){

-		putbuf(ibuf);

-		return -1;

-	}

-	de = (DirEntry *)buf->iobuf;

-	for(;;){

-		if( ((char *)de) >= (xf->block_size + buf->iobuf) ){

-			putbuf(buf);

-			if( cur_block >= EXT2_NDIR_BLOCKS ){

-				errno = Enospace;

-				putbuf(ibuf);

-				return -1;

-			}

-			if( (baddr = inode_getblk(f, cur_block++)) == 0 ){

-				putbuf(ibuf);

-				return -1;

-			}

-			buf = getbuf(xf, baddr);

-			if( !buf ){

-				putbuf(ibuf);

-				return -1;

-			}

-			if( inode->i_size <= offset ){

-				de  = (DirEntry *)buf->iobuf;

-				de->inode = 0;

-				de->rec_len = xf->block_size;

-				dirtybuf(buf);

-				inode->i_size = offset + xf->block_size;

-				dirtybuf(ibuf);

-			}else{

-				de = (DirEntry *)buf->iobuf;

-			}

-		}

-		if( de->inode != 0 && de->name_len == namelen &&

-				!strncmp(name, de->name, namelen) ){

-			errno = Eexist;

-			putbuf(ibuf); putbuf(buf);

-			return -1;

-		}

-		offset += de->rec_len;

-		if( (de->inode == 0 && de->rec_len >= rec_len) ||

-				(de->rec_len >= DIR_REC_LEN(de->name_len) + rec_len) ){

-			if( de->inode ){

-				de1 = (DirEntry *) ((char *)de + DIR_REC_LEN(de->name_len));

-				de1->rec_len = de->rec_len - DIR_REC_LEN(de->name_len);

-				de->rec_len = DIR_REC_LEN(de->name_len);

-				de = de1;

-			}

-			de->inode = inr;

-			de->name_len = namelen;

-			memcpy(de->name, name, namelen);

-			dirtybuf(buf);

-			putbuf(buf);

-			inode->i_mtime = inode->i_ctime = time(0);

-			dirtybuf(ibuf);

-			putbuf(ibuf);

-			return 0;

-		}

-		de = (DirEntry *)((char *)de + de->rec_len);

-	}

-	/* not reached */

-}

-int

-unlink( Xfile *file )

-{

-	Xfs *xf = file->xf;

-	Inode *dir;

-	int bg, b;

-	Inode *inode;

-	Iobuf *buf, *ibuf;

-	Ext2 ed, es, eb;

-	if( S_ISDIR(getmode(file)) && !empty_dir(file) ){

-			chat("non empty directory...");

-			errno = Eperm;

-			return -1;

-	}

-	es = getext2(xf, EXT2_SUPER, 0);

-	/* get dir inode */

-	if( file->pinbr >= es.u.sb->s_inodes_count ){

-    		chat("inode number %d is too big...",  file->pinbr);

-		putext2(es);

-		errno = Eintern;

-    		return -1;

-	}

-	bg = (file->pinbr - 1) / xf->inodes_per_group;

-	if( bg >= xf->ngroups ){

-		chat("block group (%d) > groups count...", bg);

-		putext2(es);

-		errno = Eintern;

-		return -1;

-	}

-	ed = getext2(xf, EXT2_DESC, bg);

-	b = ed.u.gd->bg_inode_table +

-			(((file->pinbr-1) % xf->inodes_per_group) /

-			xf->inodes_per_block);

-	putext2(ed);

-	buf = getbuf(xf, b);

-	if( !buf ){

-		putext2(es);

-		return -1;

-	}

-	dir = ((struct Inode *) buf->iobuf) +

-		((file->pinbr-1) % xf->inodes_per_block);

-	/* Clean dir entry */

-	if( delete_entry(xf, dir, file->inbr) < 0 ){

-		putbuf(buf);

-		putext2(es);

-		return -1;

-	}

-	if( S_ISDIR(getmode(file)) ){

-		dir->i_links_count--;

-		dirtybuf(buf);

-	}

-	putbuf(buf);

-	/* clean blocks */

-	ibuf = getbuf(xf, file->bufaddr);

-	if( !ibuf ){

-		putext2(es);

-		return -1;

-	}

-	inode = ((Inode *)ibuf->iobuf) + file->bufoffset;

-	if( !S_ISLNK(getmode(file)) ||

-		(S_ISLNK(getmode(file)) && (inode->i_size > EXT2_N_BLOCKS<<2)) )

-		if( free_block_inode(file) < 0 ){

-			chat("error while freeing blocks...");

-			putext2(es);

-			putbuf(ibuf);

-			return -1;

-		}

-	/* clean inode */

-	bg = (file->inbr -1) / xf->inodes_per_group;

-	b = (file->inbr -1) % xf->inodes_per_group;

-	eb = getext2(xf, EXT2_BINODE, bg);

-	clear_bit(b, eb.u.bmp);

-	dirtyext2(eb);

-	putext2(eb);

-	inode->i_dtime = time(0);

-	inode->i_links_count--;

-	if( S_ISDIR(getmode(file)) )

-		inode->i_links_count = 0;

-	es.u.sb->s_free_inodes_count++;

-	dirtyext2(es);

-	putext2(es);

-	ed = getext2(xf, EXT2_DESC, bg);

-	ed.u.gd->bg_free_inodes_count++;

-	if( S_ISDIR(getmode(file)) )

-		ed.u.gd->bg_used_dirs_count--;

-	dirtyext2(ed);

-	putext2(ed);

-	dirtybuf(ibuf);

-	putbuf(ibuf);

-	return 1;

-}

-int

-empty_dir(Xfile *dir)

-{

-	Xfs *xf = dir->xf;

-	int nblock;

-	uint offset, i,count;

-	DirEntry *de;

-	Inode *inode;

-	Iobuf *buf, *ibuf;

-	if( !S_ISDIR(getmode(dir)) )

-		return 0;

-	ibuf = getbuf(xf, dir->bufaddr);

-	if( !ibuf )

-		return -1;

-	inode = ((Inode *)ibuf->iobuf) + dir->bufoffset;

-	nblock = (inode->i_blocks * 512) / xf->block_size;

-	for(i=0, count=0 ; (i < nblock) && (i < EXT2_NDIR_BLOCKS) ; i++){

-		buf = getbuf(xf, inode->i_block[i]);

-		if( !buf ){

-			putbuf(ibuf);

-			return 0;

-		}

-		for(offset=0 ; offset < xf->block_size ;  ){

-			de = (DirEntry *)(buf->iobuf + offset);

-			if(de->inode)

-				count++;

-			offset += de->rec_len;

-		}

-		putbuf(buf);

-		if( count > 2 ){

-			putbuf(ibuf);

-			return 0;

-		}

-	}

-	putbuf(ibuf);

-	return 1;

-}

-int

-free_block_inode(Xfile *file)

-{

-	Xfs *xf = file->xf;

-	int i, j, k;

-	ulong b, *y, *z;

-	uint *x;

-	int naddr;

-	Inode *inode;

-	Iobuf *buf, *buf1, *buf2, *ibuf;

-	ibuf = getbuf(xf, file->bufaddr);

-	if( !ibuf )

-		return -1;

-	inode = ((Inode *)ibuf->iobuf) + file->bufoffset;

-	for(i=0 ; i < EXT2_IND_BLOCK ; i++){

-		x = inode->i_block + i;

-		if( *x == 0 ){ putbuf(ibuf); return 0; }

-		free_block(xf, *x);

-	}

-	naddr = xf->addr_per_block;

-	/* indirect blocks */

-	if( (b=inode->i_block[EXT2_IND_BLOCK]) ){

-		buf = getbuf(xf, b);

-		if( !buf ){ putbuf(ibuf); return -1; }

-		for(i=0 ; i < naddr ; i++){

-			x = ((uint *)buf->iobuf) + i;

-			if( *x == 0 ) break;

-			free_block(xf, *x);

-		}

-		free_block(xf, b);

-		putbuf(buf);

-	}

-	/* double indirect block */

-	if( (b=inode->i_block[EXT2_DIND_BLOCK]) ){

-		buf = getbuf(xf, b);

-		if( !buf ){ putbuf(ibuf); return -1; }

-		for(i=0 ; i < naddr ; i++){

-			x = ((uint *)buf->iobuf) + i;

-			if( *x== 0 ) break;

-			buf1 = getbuf(xf, *x);

-			if( !buf1 ){ putbuf(buf); putbuf(ibuf); return -1; }

-			for(j=0 ; j < naddr ; j++){

-				y = ((ulong *)buf1->iobuf) + j;

-				if( *y == 0 ) break;

-				free_block(xf, *y);

-			}

-			free_block(xf, *x);

-			putbuf(buf1);

-		}

-		free_block(xf, b);

-		putbuf(buf);

-	}

-	/* triple indirect block */

-	if( (b=inode->i_block[EXT2_TIND_BLOCK]) ){

-		buf = getbuf(xf, b);

-		if( !buf ){ putbuf(ibuf); return -1; }

-		for(i=0 ; i < naddr ; i++){

-			x = ((uint *)buf->iobuf) + i;

-			if( *x == 0 ) break;

-			buf1 = getbuf(xf, *x);

-			if( !buf1 ){ putbuf(buf); putbuf(ibuf); return -1; }

-			for(j=0 ; j < naddr ; j++){

-				y = ((ulong *)buf1->iobuf) + j;

-				if( *y == 0 ) break;

-				buf2 = getbuf(xf, *y);

-				if( !buf2 ){ putbuf(buf); putbuf(buf1); putbuf(ibuf); return -1; }

-				for(k=0 ; k < naddr ; k++){

-					z = ((ulong *)buf2->iobuf) + k;

-					if( *z == 0 ) break;

-					free_block(xf, *z);

-				}

-				free_block(xf, *y);

-				putbuf(buf2);

-			}

-			free_block(xf, *x);

-			putbuf(buf1);

-		}

-		free_block(xf, b);

-		putbuf(buf);

-	}

-	putbuf(ibuf);

-	return 0;

-}

-void free_block( Xfs *xf, ulong block )

-{

-	ulong bg;

-	Ext2 ed, es, eb;

-	es = getext2(xf, EXT2_SUPER, 0);

-	bg = (block - es.u.sb->s_first_data_block) / xf->blocks_per_group;

-	block = (block - es.u.sb->s_first_data_block) % xf->blocks_per_group;

-	eb = getext2(xf, EXT2_BBLOCK, bg);

-	clear_bit(block, eb.u.bmp);

-	dirtyext2(eb);

-	putext2(eb);

-	es.u.sb->s_free_blocks_count++;

-	dirtyext2(es);

-	putext2(es);

-	ed = getext2(xf, EXT2_DESC, bg);

-	ed.u.gd->bg_free_blocks_count++;

-	dirtyext2(ed);

-	putext2(ed);

-}

-int

-delete_entry(Xfs *xf, Inode *inode, int inbr)

-{

-	int nblock = (inode->i_blocks * 512) / xf->block_size;

-	uint offset, i;

-	DirEntry *de, *pde;

-	Iobuf *buf;

-	if( !S_ISDIR(inode->i_mode) )

-		return -1;

-	for(i=0 ; (i < nblock) && (i < EXT2_NDIR_BLOCKS) ; i++){

-		buf = getbuf(xf, inode->i_block[i]);

-		if( !buf )

-			return -1;

-		pde = 0;

-		for(offset=0 ; offset < xf->block_size ;  ){

-			de = (DirEntry *)(buf->iobuf + offset);

-			if( de->inode == inbr ){

-				if( pde )

-					pde->rec_len += de->rec_len;

-				de->inode = 0;

-				dirtybuf(buf);

-				putbuf(buf);

-				return 1;

-			}

-			offset += de->rec_len;

-			pde = de;

-		}

-		putbuf(buf);

-	}

-	errno = Enonexist;

-	return -1;

-}

-int

-truncfile(Xfile *f)

-{

-	Inode *inode;

-	Iobuf *ibuf;

-	chat("trunc(fid=%d) ...", f->fid);

-	ibuf = getbuf(f->xf, f->bufaddr);

-	if( !ibuf )

-		return -1;

-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;

-	if( free_block_inode(f) < 0 ){

-		chat("error while freeing blocks...");

-		putbuf(ibuf);

-		return -1;

-	}

-	inode->i_atime = inode->i_mtime = time(0);

-	inode->i_blocks = 0;

-	inode->i_size = 0;

-	memset(inode->i_block, 0, EXT2_N_BLOCKS*sizeof(ulong));

-	dirtybuf(ibuf);

-	putbuf(ibuf);

-	chat("trunc ok...");

-	return 0;

-}

-long

-getmode(Xfile *f)

-{

-	Iobuf *ibuf;

-	long mode;

-	ibuf = getbuf(f->xf, f->bufaddr);

-	if( !ibuf )

-		return -1;

-	mode = (((Inode *)ibuf->iobuf) + f->bufoffset)->i_mode;

-	putbuf(ibuf);

-	return mode;

-}

-void

-CleanSuper(Xfs *xf)

-{

-	Ext2 es;

-	es = getext2(xf, EXT2_SUPER, 0);

-	es.u.sb->s_state = EXT2_VALID_FS;

-	dirtyext2(es);

-	putext2(es);

-}

-int

-test_bit(int i, void *data)

-{

-	char *pt = (char *)data;

-	return pt[i>>3] & (0x01 << (i&7));

-}

-int

-set_bit(int i, void *data)

-{

-  	char *pt;

-  	if( test_bit(i, data) )

-    		return 1; /* bit already set !!! */

-  	pt = (char *)data;

-  	pt[i>>3] |= (0x01 << (i&7));

-  	return 0;

-}

-int

-clear_bit(int i, void *data)

-{

-	char *pt;

-  	if( !test_bit(i, data) )

-    		return 1; /* bit already clear !!! */

- 	 pt = (char *)data;

-  	pt[i>>3] &= ~(0x01 << (i&7));

-	return 0;

-}

-void *

-memscan( void *data, int c, int count )

-{

-	char *pt = (char *)data;

-	while( count ){

-		if( *pt == c )

-			return (void *)pt;

-		count--;

-		pt++;

-	}

-	return (void *)pt;

-}

-int

-find_first_zero_bit( void *data, int count /* in byte */)

-{

-  char *pt = (char *)data;

-  int n, i;

-  n = 0;

-  while( n < count ){

-    for(i=0 ; i < 8 ; i++)

-      if( !(*pt & (0x01 << (i&7))) )

-	return (n<<3) + i;

-    n++; pt++;

-  }

-  return n << 3;

-}

-int

-find_next_zero_bit( void *data, int count /* in byte */, int where)

-{

-  char *pt = (((char *)data) + (where >> 3));

-  int n, i;

-  n = where >> 3;

-  i = where & 7;

-  while( n < count ){

-    for(; i < 8 ; i++)

-      if( !(*pt & (0x01 << (i&7))) )

-	return (n<<3) + i;

-    n++; pt++; i=0;

-  }

-  return n << 3;

-}

-int

-ffz( int x )

-{

-	int c = 0;

-	while( x&1 ){

-		c++;

-		x >>= 1;

-	}

-	return c;

-}

--- a/sys/src/cmd/ext2srv/fns.h

+++ /dev/null

@@ -1,70 +1,0 @@

-void	chat(char*, ...);

-Xfile *	clean(Xfile*);

-void	dirdump(void*);

-int	dosfs(Xfs*);

-int	emptydir(Xfile*);

-int	falloc(Xfs*);

-int	fileaddr(Xfile*, int, int);

-int	getfat(Xfs*, int);

-int	getfile(Xfile*);

-Xfs *	getxfs(char*);

-void	panic(char*, ...);

-void	putfat(Xfs*, int, int);

-void	putfile(Xfile*);

-void	refxfs(Xfs*, int);

-long	writefile(Xfile*, void*, vlong, long);

-char *	xerrstr(int);

-Xfile *	xfile(Fid*, int);

-int	xfspurge(void);

-int ext2fs(Xfs *);

-int get_inode( Xfile *, uint);

-char *getname(Xfile *, char *);

-int get_file(Xfile *, char *);

-int bmap( Xfile *f, int block );

-int ffz(int);

-long	readdir(Xfile*, void*, vlong, long);

-long	readfile(Xfile*, void*, vlong, long);

-void dostat(Qid, Xfile *, Dir *);

-int new_block( Xfile *, int);

-int test_bit(int, void *);

-int set_bit(int, void *);

-int  clear_bit(int , void *);

-void *memscan(void *, int, int);

-int find_first_zero_bit(void *, int);

-int find_next_zero_bit(void *, int, int);

-int block_getblk(Xfile *, int, int);

-int inode_getblk(Xfile *, int);

-int getblk(Xfile *, int);

-int  new_inode(Xfile *, int);

-int add_entry(Xfile *, char *, int);

-int create_file(Xfile *, char *, int);

-int create_dir(Xfile *, char *, int);

-int unlink(Xfile *);

-int  delete_entry(Xfs *, Inode *, int);

-int  free_block_inode(Xfile *);

-void free_block( Xfs *, ulong);

-void free_inode( Xfs *, int);

-int empty_dir(Xfile *);

-int truncfile(Xfile *);

-int dowstat(Xfile *, Dir *);

-long getmode(Xfile *);

-Ext2 getext2(Xfs *, char, int);

-void CleanSuper(Xfs *);

-/* Iobuf operations */

-Iobuf *getbuf(Xfs *, long addr);

-void putbuf(Iobuf *);

-void purgebuf(Xfs *);

-void iobuf_init(void);

-int xread(Xfs *, Iobuf *, long);

-void syncbuf(void);

-void xwrite(Iobuf *);

-void dirtybuf(Iobuf *);

-void mchat(char *fmt, ...);

-void dumpbuf(void);

-void gidfile(char*);

-void uidfile(char*);

--- a/sys/src/cmd/ext2srv/iobuf.c

+++ /dev/null

@@ -1,174 +1,0 @@

-#include <u.h>

-#include <libc.h>

-#include <fcall.h>

-#include <thread.h>

-#include <9p.h>

-#include "dat.h"

-#include "fns.h"

-#define	NIOBUF		100

-#define	HIOB		(NIOBUF/3)

-static Iobuf*	hiob[HIOB];		/* hash buckets */

-static Iobuf	iobuf[NIOBUF];		/* buffer headers */

-static Iobuf*	iohead;

-static Iobuf*	iotail;

-Iobuf*

-getbuf(Xfs *dev, long addr)

-{

-	Iobuf *p, *h, **l, **f;

-	l = &hiob[addr%HIOB];

-	for(p = *l; p; p = p->hash) {

-		if(p->addr == addr && p->dev == dev) {

-			p->busy++;

-			return p;

-		}

-	}

-	/* Find a non-busy buffer from the tail */

-	for(p = iotail; p && (p->busy > 0); p = p->prev)

-		;

-	if(!p)

-		panic("all buffers busy");

-	if(p->dirty){

-		xwrite(p);

-		p->dirty = 0;

-	}

-	if( xread(dev, p, addr) < 0)

-		return 0;

-	/* Delete from hash chain */

-	f = &hiob[p->addr%HIOB];

-	if( *f == p )

-		*f = p->hash;

-	else {

-		for(h = *f; h ; h = h->hash)

-			if( h->hash == p ){

-				h->hash = p->hash;

-				break;

-			}

-	}

-	/* Fill and hash */

-	p->hash = *l;

-	*l = p;

-	p->addr = addr;

-	p->dev = dev;

-	p->busy=1;

-	return p;

-}

-void

-putbuf(Iobuf *p)

-{

-	if(p->busy <= 0)

-		panic("putbuf");

-	p->busy--;

-	/* Link onto head for lru */

-	if(p == iohead)

-		return;

-	if( p == iotail ){

-		p->prev->next = 0;

-		iotail = p->prev;

-	}else{

-		p->prev->next = p->next;

-		p->next->prev = p->prev;

-	}

-	p->prev = 0;

-	p->next = iohead;

-	iohead->prev = p;

-	iohead = p;

-}

-void

-dirtybuf(Iobuf *p)

-{

-	if(p->busy <=0)

-		panic("dirtybuf");

-	p->dirty = 1;

-}

-void

-syncbuf(void)

-{

-	Iobuf *p;

-	for(p=&iobuf[0] ; p<&iobuf[NIOBUF]; p++)

-		if( p->dirty ){

-			xwrite(p);

-			p->dirty = 0;

-		}

-}

-void

-purgebuf(Xfs *dev)

-{

-	Iobuf *p;

-	for(p=&iobuf[0]; p<&iobuf[NIOBUF]; p++)

-		if(p->dev == dev)

-			p->busy = 0;

-	/* Blow hash chains */

-	memset(hiob, 0, sizeof(hiob));

-}

-void

-iobuf_init(void)

-{

-	Iobuf *p;

-	iohead = iobuf;

-	iotail = iobuf+NIOBUF-1;

-	for(p = iobuf; p <= iotail; p++) {

-		p->next = p+1;

-		p->prev = p-1;

-		p->iobuf = (char *)malloc(EXT2_MAX_BLOCK_SIZE);

-		if(p->iobuf == 0)

-			panic("iobuf_init");

-	}

-	iohead->prev = 0;

-	iotail->next = 0;

-}

-int

-xread(Xfs *dev, Iobuf *p, long addr)

-{

-	/*chat("xread %d,%d...", dev->dev, addr);*/

-	seek(dev->dev, (vlong)addr*dev->block_size, 0);

-	if(read(dev->dev, p->iobuf, dev->block_size) != dev->block_size){

-		chat("xread %d, block=%d failed ...", dev->dev, addr);

-		errno = Eio;

-		return -1;

-	}

-	/*chat("xread ok...");*/

-	return 0;

-}

-void

-xwrite(Iobuf *p)

-{

-	Xfs *dev;

-	long addr;

-	dev = p->dev;

-	addr = p->addr;

-	/*chat("xwrite %d,%d...", dev->dev, addr);*/

-	seek(dev->dev, (vlong)addr*dev->block_size, 0);

-	if(write(dev->dev, p->iobuf, dev->block_size) != dev->block_size){

-		chat("xwrite %d, block=%d failed ...", dev->dev, addr);

-		errno = Eio;

-		return;

-	}

-	/*chat("xwrite ok...");*/

-}

-void

-dumpbuf(void)

-{

-	Iobuf *p;

-	for(p = iotail; p ; p = p->prev)

-		if( p->busy )

-			mchat("\nHi ERROR buf(%x, %d, %d)\n", p, p->addr, p->busy);

-}

--- a/sys/src/cmd/ext2srv/mkfile

+++ /dev/null

@@ -1,18 +1,0 @@

-</$objtype/mkfile

-TARG=ext2srv

-OFILES=\

-	xfssrv.$O\

-	xfile.$O\

-	ext2fs.$O\

-	ext2subs.$O\

-	chat.$O\

-	iobuf.$O\

-HFILES=dat.h\

-	fns.h\

-BIN=/$objtype/bin

-</sys/src/cmd/mkone

-xfssrv.$O:	errstr.h

--- a/sys/src/cmd/ext2srv/readme

+++ /dev/null

@@ -1,53 +1,0 @@

-Ext2srv Version 0.2

-----------------

-Ext2srv is a file server that interprets EXT2 file systems. Ext2srv is identical

-to dossrv in specification.

-I added just one option. By default ext2srv search for the first ext2 partition

-on the device (typically a disk) given by the mount spec option (see bind(1)).

-So, if you have different ext2 partitions on the same disk you can select one

-of them by adding the partition number at the end of the device in the mount

-system call. For example

-	mount -c /srv/ext2 /n/linux /dev/hd1disk:3

-forces the server to look for ext2 filesystem on the third partition of your second

-hard drive.

-WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING

-	Ext2srv uses some cache. So you must unmount the directory where you

-	mount your ext2 partition. It's the only way to synchronise dirty buffers

-	with the disk.

-	Don't reboot your terminal (^t^t r) without explicitly unmount.

-	Using something like this script is recommended :

-		#!/bin/rc

-		unmount /n/linux >[2] /dev/null

-		unmount /n/linux2 >[2] /dev/null

-		disk/kfscmd halt

-WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING

-I provide this software `as is' and without any warranty. Feed back are welcome !!!

[email protected]

-changes 5/17/2000 - threw away partition table

-walking, fixed name_len (it's a uchar not a ushort).

--rsc

-changes for 4th edition 13 May 2002 - [email protected]

- - adapted for 9P2000

- - added [-p passwd] [-g group] args as in tapefs(4)

- - create makes files with user and group of parent directory (not 100/200)

- - prevent writing to non-regular files

- - correct calculation of group descriptor block location when bsize!=1024

--- a/sys/src/cmd/ext2srv/version

+++ /dev/null

@@ -1,36 +1,0 @@

-# ext2srv

-# [bl]

-on trouve le numero de version sur les 2 premières lignes du

-fichier ext2subs.c.

-Version 0.1 :

-1)	il n'ya plus de copie d'inode

-	tous les iobuf utilisés dans un fonction

-	sont libérés. Un getbuf() => Un putbuf.

-2)	Tous dans les iobufs : super, group desc et bitmaps

-3)	Il n'ya plus aucune reférence au contenu d'une inode dans la

-	structure Xfile.

-4)	Choix de la parition en passant /dev/hd?disk:n lors du mount

-Version 0.11 :

-1)	-v affiche les blocks manipulés en lecture et écriture [18/10/96]

-2)	bug pour open avec TRUNC sur les liens... fixed [19/10/96]

-3)	maintenant on jette si la taille des blocks != 1024 dans ext2fs() [21/10/96]

-	(c'est quand même mieux pour le moment ...)

-Version 0.20 :

-1)	les blocks de 1024, 2048, 4096 octets sont supportés. [22/10/96]

-2)	le bug sur le qid.vers est détecté mais non corrigé...

\ No newline at end of file

--- a/sys/src/cmd/ext2srv/xfile.c

+++ /dev/null

@@ -1,161 +1,0 @@

-#include <u.h>

-#include <libc.h>

-#include <fcall.h>

-#include <thread.h>

-#include <9p.h>

-#include "dat.h"

-#include "fns.h"

-static Xfs	*xhead;

-static Xfile *freelist;

-static Lock	xlock, freelock;

-int	client;

-Xfs *

-getxfs(char *name)

-{

-	int fd;

-	Dir *dir;

-	Xfs *xf, *fxf;

-	if(name==0 || name[0]==0)

-		name = deffile;

-	if(name == 0){

-		errno = Enofilsys;

-		return 0;

-	}

-	fd = open(name, rdonly ? OREAD : ORDWR);

-	if(fd < 0){

-		errno = Enonexist;

-		return 0;

-	}

-	if((dir = dirfstat(fd)) == 0){

-		errno = Eio;

-		close(fd);

-		return 0;

-	}

-	lock(&xlock);

-	for(fxf=0, xf=xhead; xf; xf=xf->next){

-		if(xf->ref == 0){

-			if(fxf == 0)

-				fxf = xf;

-			continue;

-		}

-		if(xf->qid.path != dir->qid.path || xf->qid.vers != dir->qid.vers)

-			continue;

-		if(strcmp(xf->name, name) != 0 || xf->dev < 0)

-			continue;

-		chat("incref \"%s\", dev=%d...", xf->name, xf->dev);

-		++xf->ref;

-		unlock(&xlock);

-		close(fd);

-		free(dir);

-		return xf;

-	}

-	if(fxf==0){

-		fxf = malloc(sizeof(Xfs));

-		if(fxf==0){

-			unlock(&xlock);

-			close(fd);

-			free(dir);

-			errno = Enomem;

-			return 0;

-		}

-		fxf->next = xhead;

-		xhead = fxf;

-	}

-	chat("alloc \"%s\", dev=%d...", name, fd);

-	fxf->name = strdup(name);

-	fxf->ref = 1;

-	fxf->qid = dir->qid;

-	fxf->dev = fd;

-	fxf->fmt = 0;

-	fxf->ptr = 0;

-	free(dir);

-	if( ext2fs(fxf)<0 ){

-		xhead = fxf->next;

-		free(fxf);

-		unlock(&xlock);

-		return 0;

-	}

-	unlock(&xlock);

-	return fxf;

-}

-void

-refxfs(Xfs *xf, int delta)

-{

-	lock(&xlock);

-	xf->ref += delta;

-	if(xf->ref == 0){

-		/*mchat("free \"%s\", dev=%d...", xf->name, xf->dev);

-		dumpbuf();*/

-		CleanSuper(xf);

-		syncbuf();

-		free(xf->name);

-		purgebuf(xf);

-		if(xf->dev >= 0){

-			close(xf->dev);

-			xf->dev = -1;

-		}

-	}

-	unlock(&xlock);

-}

-Xfile *

-xfile(Fid *fid, int flag)

-{

-	Xfile *f;

-	f = (Xfile*)fid->aux;

-	switch(flag){

-	default:

-		panic("xfile");

-	case Asis:

-		return (f && f->xf && f->xf->dev < 0) ? 0 : f;

-	case Clean:

-		if (f) chat("Clean and fid->aux already exists\n");

-		break;

-	case Clunk:

-		if(f){

-			clean(f);

-			lock(&freelock);

-			f->next = freelist;

-			freelist = f;

-			unlock(&freelock);

-			fid->aux = 0;

-		}

-		return 0;

-	}

-	if(f)

-		return clean(f);

-	lock(&freelock);

-	if(f = freelist){	/* assign = */

-		freelist = f->next;

-		unlock(&freelock);

-	} else {

-		unlock(&freelock);

-		f = malloc(sizeof(Xfile));

-	}

-	fid->aux = f;

-	f->fid = fid->fid;

-	f->client = client;

-	f->xf = 0;

-	f->ptr = 0;

-	f->root = 0;

-	return f;

-}

-Xfile *

-clean(Xfile *f)

-{

-	if(f->xf && f->root){

-		refxfs(f->xf, -1);

-		f->xf = 0;

-	}

-	f->xf = 0;

-	f->root = 0;

-	f->dirindex = 0;

-	return f;

-}

--- a/sys/src/cmd/ext2srv/xfssrv.c

+++ /dev/null

@@ -1,91 +1,0 @@

-#include <u.h>

-#include <libc.h>

-#include <fcall.h>

-#include <thread.h>

-#include <9p.h>

-#include "dat.h"

-#include "fns.h"

-#include "errstr.h"

-int	errno;

-int rdonly;

-char	*srvfile;

-char	*deffile;

-extern void iobuf_init(void);

-extern Srv ext2srv;

-void

-usage(void)

-{

-	fprint(2, "usage: %s [-v] [-s] [-r] [-p passwd] [-g group] [-f devicefile] [srvname]\n", argv0);

-	exits("usage");

-}

-/*void handler(void *v, char *sig)

-{

-	USED(v,sig);

-	syncbuf();

-	noted(NDFLT);

-}*/

-void

-main(int argc, char **argv)

-{

-	int stdio;

-	stdio = 0;

-	ARGBEGIN{

-	case 'D':

-		++chatty9p;

-		break;

-	case 'v':

-		++chatty;

-		break;

-	case 'f':

-		deffile = ARGF();

-		break;

-	case 'g':

-		gidfile(ARGF());

-		break;

-	case 'p':

-		uidfile(ARGF());

-		break;

-	case 's':

-		stdio = 1;

-		break;

-	case 'r':

-		rdonly = 1;

-		break;

-	default:

-		usage();

-	}ARGEND

-	if(argc == 0)

-		srvfile = "ext2";

-	else if(argc == 1)

-		srvfile = argv[0];

-	else

-		usage();

-	iobuf_init();

-	/*notify(handler);*/

-	if(stdio){

-		srv(&ext2srv);

-	}else{

-		chat("%s %d: serving %s\n", argv0, getpid(), srvfile);

-		postmountsrv(&ext2srv, srvfile, 0, 0);

-	}

-	exits(0);

-}

-char *

-xerrstr(int e)

-{

-	if (e < 0 || e >= sizeof errmsg/sizeof errmsg[0])

-		return "no such error";

-	else

-		return errmsg[e];

-}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/LICENSE

@@ -1,0 +1,36 @@

+Files include/tree.h and include/queue.h include their own respective

+(BSD 4-clause) license, as well as ext4_hash.c (BSD 2-clause).

+The rest of the files are licensed under BSD 3-clause:

+Copyright (c) 2013-2017 Grzegorz Kostka ([email protected])

+Copyright (c) 2015-2017 Kaho Ng ([email protected])

+Copyright (c) 2020-2024 Sigrid Solveig Haflínudóttir ([email protected])

+HelenOS: Copyright (c) 2012 Martin Sucha

+         Copyright (c) 2012 Frantisek Princ

+All rights reserved.

+Redistribution and use in source and binary forms, with or without

+modification, are permitted provided that the following conditions

+are met:

+- Redistributions of source code must retain the above copyright

+  notice, this list of conditions and the following disclaimer.

+- Redistributions in binary form must reproduce the above copyright

+  notice, this list of conditions and the following disclaimer in the

+  documentation and/or other materials provided with the distribution.

+- The name of the author may not be used to endorse or promote products

+  derived from this software without specific prior written permission.

+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR

+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES

+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.

+IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,

+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT

+NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF

+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

--- /dev/null

+++ b/sys/src/cmd/ext4srv/NOTES

@@ -1,0 +1,5 @@

+Based on https://github.com/gkostka/lwext4

+None of the sources are GPL-licensed:

+ * xattr handling removed altogether

+ * extents logic replaced with a BSD-3 licensed one found in https://github.com/ngkaho1234/lwext

--- /dev/null

+++ b/sys/src/cmd/ext4srv/common.h

@@ -1,0 +1,50 @@

+typedef struct Opts Opts;

+typedef struct Part Part;

+#pragma varargck type "Ð" Part*

+#pragma varargck type "M" Part*

+struct Opts {

+	char *group;

+	int cachewb;

+	int linkmode;

+	int asroot;

+	int rdonly;

+	int fstype;

+	int blksz;

+	int inodesz;

+	u32int ninode;

+	char *label;

+};

+struct Part {

+	Ref;

+	QLock;

+	Part *prev, *next;

+	char dev[32];

+	char mnt[32];

+	char *partdev;

+	struct ext4_blockdev bdev;

+	struct ext4_blockdev_iface bdif;

+	struct ext4_sblock *sb;

+	struct ext4_lock oslocks;

+	Qid qid;

+	Qid qidmask;

+	Groups groups;

+	int f;

+	uchar blkbuf[];

+};

+enum {

+	Lhide,

+	Lresolve = 1,

+};

+Part *openpart(char *dev, Opts *opts);

+void closepart(Part *p);

+void closeallparts(void);

+void statallparts(void);

+void syncallparts(void);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4.c

@@ -1,0 +1,2961 @@

+#include "ext4_config.h"

+#include "ext4.h"

+#include "ext4_trans.h"

+#include "ext4_fs.h"

+#include "ext4_dir.h"

+#include "ext4_inode.h"

+#include "ext4_super.h"

+#include "ext4_block_group.h"

+#include "ext4_dir_idx.h"

+#include "ext4_journal.h"

+char Eexists[] = "file exists";

+char Einval[] = "invalid operation";

+char Eio[] = "i/o error";

+char Enomem[] = "no memory";

+char Enospc[] = "no space";

+char Enotfound[] = "file not found";

+char Eperm[] = "permission denied";

+char Erdonlyfs[] = "read-only fs";

+/**@brief   Mount point OS dependent lock*/

+#define EXT4_MP_LOCK(_m)                                               \

+	do {                                                               \

+		if ((_m)->os_locks)                                            \

+			(_m)->os_locks->lock((_m)->os_locks->p_user);              \

+	} while (0)

+/**@brief   Mount point OS dependent unlock*/

+#define EXT4_MP_UNLOCK(_m)                                             \

+	do {                                                               \

+		if ((_m)->os_locks)                                            \

+			(_m)->os_locks->unlock((_m)->os_locks->p_user);            \

+	} while (0)

+/**@brief   Mount point descriptor.*/

+struct ext4_mountpoint {

+	/**@brief   Mount done flag.*/

+	bool mounted;

+	/**@brief   Mount point name (@ref ext4_mount)*/

+	char name[CONFIG_EXT4_MAX_MP_NAME + 1];

+	/**@brief   OS dependent lock/unlock functions.*/

+	const struct ext4_lock *os_locks;

+	/**@brief   Ext4 filesystem internals.*/

+	struct ext4_fs fs;

+	/**@brief   JBD fs.*/

+	struct jbd_fs jbd_fs;

+	/**@brief   Journal.*/

+	struct jbd_journal jbd_journal;

+	/**@brief   Block cache.*/

+	struct ext4_bcache bc;

+};

+/**@brief   Block devices descriptor.*/

+struct ext4_block_devices {

+	/**@brief   Block device name.*/

+	char name[CONFIG_EXT4_MAX_BLOCKDEV_NAME + 1];

+	/**@brief   Block device handle.*/

+	struct ext4_blockdev *bd;

+};

+/**@brief   Block devices.*/

+static struct ext4_block_devices s_bdevices[CONFIG_EXT4_BLOCKDEVS_COUNT];

+/**@brief   Mountpoints.*/

+static struct ext4_mountpoint s_mp[CONFIG_EXT4_MOUNTPOINTS_COUNT];

+int ext4_device_register(struct ext4_blockdev *bd,

+			 const char *dev_name)

+{

+	assert(bd && dev_name);

+	if (strlen(dev_name) > CONFIG_EXT4_MAX_BLOCKDEV_NAME) {

+		werrstr("dev name too long: %s", dev_name);

+		return -1;

+	}

+	for (usize i = 0; i < CONFIG_EXT4_BLOCKDEVS_COUNT; ++i) {

+		if (!strcmp(s_bdevices[i].name, dev_name)) {

+			werrstr("dev already exists: %s", dev_name);

+			return -1;

+		}

+	}

+	for (usize i = 0; i < CONFIG_EXT4_BLOCKDEVS_COUNT; ++i) {

+		if (!s_bdevices[i].bd) {

+			strcpy(s_bdevices[i].name, dev_name);

+			s_bdevices[i].bd = bd;

+			return 0;

+		}

+	}

+	werrstr("dev limit reached");

+	return -1;

+}

+int ext4_device_unregister(const char *dev_name)

+{

+	assert(dev_name);

+	for (usize i = 0; i < CONFIG_EXT4_BLOCKDEVS_COUNT; ++i) {

+		if (strcmp(s_bdevices[i].name, dev_name) == 0) {

+            memset(&s_bdevices[i], 0, sizeof(s_bdevices[i]));

+            return 0;

+        }

+	}

+	werrstr("dev not found: %s", dev_name);

+	return -1;

+}

+int ext4_device_unregister_all(void)

+{

+	memset(s_bdevices, 0, sizeof(s_bdevices));

+	return 0;

+}

+static bool ext4_is_dots(const u8int *name, usize name_size)

+{

+	if ((name_size == 1) && (name[0] == '.'))

+		return true;

+	if ((name_size == 2) && (name[0] == '.') && (name[1] == '.'))

+		return true;

+	return false;

+}

+static int ext4_has_children(bool *has_children, struct ext4_inode_ref *enode)

+{

+	struct ext4_sblock *sb = &enode->fs->sb;

+	/* Check if node is directory */

+	if (!ext4_inode_is_type(sb, enode->inode, EXT4_INODE_MODE_DIRECTORY)) {

+		*has_children = false;

+		return 0;

+	}

+	struct ext4_dir_iter it;

+	int rc = ext4_dir_iterator_init(&it, enode, 0);

+	if (rc != 0)

+		return rc;

+	/* Find a non-empty directory entry */

+	bool found = false;

+	while (it.curr != nil) {

+		if (ext4_dir_en_get_inode(it.curr) != 0) {

+			u16int nsize;

+			nsize = ext4_dir_en_get_name_len(sb, it.curr);

+			if (!ext4_is_dots(it.curr->name, nsize)) {

+				found = true;

+				break;

+			}

+		}

+		rc = ext4_dir_iterator_next(&it);

+		if (rc != 0) {

+			ext4_dir_iterator_fini(&it);

+			return rc;

+		}

+	}

+	rc = ext4_dir_iterator_fini(&it);

+	if (rc != 0)

+		return rc;

+	*has_children = found;

+	return 0;

+}

+static int ext4_link(struct ext4_mountpoint *mp, struct ext4_inode_ref *parent,

+		     struct ext4_inode_ref *ch, const char *n,

+		     u32int len, bool rename)

+{

+	/* Check maximum name length */

+	if (len > EXT4_DIRECTORY_FILENAME_LEN) {

+		werrstr("entry name too long: %s", n);

+		return -1;

+	}

+	/* Add entry to parent directory */

+	int r = ext4_dir_add_entry(parent, n, len, ch);

+	if (r != 0)

+		return r;

+	/* Fill new dir -> add '.' and '..' entries.

+	 * Also newly allocated inode should have 0 link count.

+	 */

+	bool is_dir = ext4_inode_is_type(&mp->fs.sb, ch->inode,

+			       EXT4_INODE_MODE_DIRECTORY);

+	if (is_dir && !rename) {

+		/* Initialize directory index if supported */

+		if (ext4_sb_feature_com(&mp->fs.sb, EXT4_FCOM_DIR_INDEX)) {

+			r = ext4_dir_dx_init(ch, parent);

+			if (r != 0)

+				return r;

+			ext4_inode_set_flag(ch->inode, EXT4_INODE_FLAG_INDEX);

+			ch->dirty = true;

+		} else

+		r = ext4_dir_add_entry(ch, ".", strlen("."), ch);

+		if (r != 0) {

+			ext4_dir_remove_entry(parent, n, strlen(n));

+			return r;

+		}

+		r = ext4_dir_add_entry(ch, "..", strlen(".."), parent);

+		if (r != 0) {

+			ext4_dir_remove_entry(parent, n, strlen(n));

+			ext4_dir_remove_entry(ch, ".", strlen("."));

+			return r;

+		}

+		/*New empty directory. Two links (. and ..) */

+		ext4_inode_set_links_cnt(ch->inode, 2);

+		ext4_fs_inode_links_count_inc(parent);

+		ch->dirty = true;

+		parent->dirty = true;

+		return r;

+	}

+	/*

+	 * In case we want to rename a directory,

+	 * we reset the original '..' pointer.

+	 */

+	if (is_dir) {

+		bool idx;

+		idx = ext4_inode_has_flag(ch->inode, EXT4_INODE_FLAG_INDEX);

+		struct ext4_dir_search_result res;

+		if (!idx) {

+			r = ext4_dir_find_entry(&res, ch, "..", strlen(".."));

+			if (r != 0) {

+				werrstr(Eio);

+				return -1;

+			}

+			ext4_dir_en_set_inode(res.dentry, parent->index);

+			ext4_trans_set_block_dirty(res.block.buf);

+			r = ext4_dir_destroy_result(ch, &res);

+			if (r != 0)

+				return r;

+		} else {

+			r = ext4_dir_dx_reset_parent_inode(ch, parent->index);

+			if (r != 0)

+				return r;

+		}

+		ext4_fs_inode_links_count_inc(parent);

+		parent->dirty = true;

+	}

+	if (!rename) {

+		ext4_fs_inode_links_count_inc(ch);

+		ch->dirty = true;

+	}

+	return r;

+}

+static int ext4_unlink(struct ext4_mountpoint *mp,

+		       struct ext4_inode_ref *parent,

+		       struct ext4_inode_ref *child, const char *name,

+		       u32int name_len)

+{

+	bool has_children;

+	int rc = ext4_has_children(&has_children, child);

+	if (rc != 0)

+		return rc;

+	/* Cannot unlink non-empty node */

+	if (has_children) {

+		werrstr("remove -- directory not empty");

+		return -1;

+	}

+	/* Remove entry from parent directory */

+	rc = ext4_dir_remove_entry(parent, name, name_len);

+	if (rc != 0)

+		return rc;

+	bool is_dir = ext4_inode_is_type(&mp->fs.sb, child->inode,

+					 EXT4_INODE_MODE_DIRECTORY);

+	/* If directory - handle links from parent */

+	if (is_dir) {

+		ext4_fs_inode_links_count_dec(parent);

+		parent->dirty = true;

+	}

+	/*

+	 * TODO: Update timestamps of the parent

+	 * (when we have wall-clock time).

+	 *

+	 * ext4_inode_set_change_inode_time(parent->inode, (u32int) now);

+	 * ext4_inode_set_modification_time(parent->inode, (u32int) now);

+	 * parent->dirty = true;

+	 */

+	/*

+	 * TODO: Update timestamp for inode.

+	 *

+	 * ext4_inode_set_change_inode_time(child->inode,

+	 *     (u32int) now);

+	 */

+	if (ext4_inode_get_links_cnt(child->inode)) {

+		ext4_fs_inode_links_count_dec(child);

+		child->dirty = true;

+	}

+	return 0;

+}

+int ext4_mount(const char *dev_name, const char *mount_point,

+	       bool read_only)

+{

+	int r;

+	u32int bsize;

+	struct ext4_bcache *bc;

+	struct ext4_blockdev *bd = 0;

+	struct ext4_mountpoint *mp = 0;

+	assert(mount_point && dev_name);

+	usize mp_len = strlen(mount_point);

+	if (mp_len > CONFIG_EXT4_MAX_MP_NAME) {

+		werrstr("mount point name too long: %s", mount_point);

+		return -1;

+	}

+	if (mount_point[mp_len - 1] != '/') {

+		werrstr("invalid mount point: %s", mount_point);

+		return -1;

+	}

+	for (usize i = 0; i < CONFIG_EXT4_BLOCKDEVS_COUNT; ++i) {

+		if (!strcmp(dev_name, s_bdevices[i].name)) {

+			bd = s_bdevices[i].bd;

+			break;

+		}

+	}

+	if (!bd) {

+		werrstr("dev not found: %s", dev_name);

+		return -1;

+	}

+	for (usize i = 0; i < CONFIG_EXT4_MOUNTPOINTS_COUNT; ++i) {

+		if (!s_mp[i].mounted) {

+			strcpy(s_mp[i].name, mount_point);

+			s_mp[i].mounted = 1;

+			mp = &s_mp[i];

+			break;

+		}

+		if (!strcmp(s_mp[i].name, mount_point))

+			return 0;

+	}

+	if (!mp) {

+		werrstr("memory");

+		return -1;

+	}

+	r = ext4_block_init(bd);

+	if (r != 0)

+		return r;

+	r = ext4_fs_init(&mp->fs, bd, read_only);

+	if (r != 0) {

+		ext4_block_fini(bd);

+		return r;

+	}

+	bsize = ext4_sb_get_block_size(&mp->fs.sb);

+	ext4_block_set_lb_size(bd, bsize);

+	bc = &mp->bc;

+	r = ext4_bcache_init_dynamic(bc, CONFIG_BLOCK_DEV_CACHE_SIZE, bsize);

+	if (r != 0) {

+		ext4_block_fini(bd);

+		return r;

+	}

+	if (bsize != bc->itemsize) {

+		werrstr("unsupported block size: %d", bsize);

+		return -1;

+	}

+	/*Bind block cache to block device*/

+	r = ext4_block_bind_bcache(bd, bc);

+	if (r != 0) {

+		ext4_bcache_cleanup(bc);

+		ext4_block_fini(bd);

+		ext4_bcache_fini_dynamic(bc);

+		return r;

+	}

+	bd->fs = &mp->fs;

+	return r;

+}

+static struct ext4_mountpoint *ext4_get_mount(const char *path)

+{

+	for (usize i = 0; i < CONFIG_EXT4_MOUNTPOINTS_COUNT; ++i) {

+		if (!s_mp[i].mounted)

+			continue;

+		if (!strncmp(s_mp[i].name, path, strlen(s_mp[i].name)))

+			return &s_mp[i];

+	}

+	werrstr("mount point not found: %s", path);

+	return nil;

+}

+int ext4_umount(const char *mount_point)

+{

+	int r;

+	struct ext4_mountpoint *mp = ext4_get_mount(mount_point);

+	if (!mp)

+		return -1;

+	r = ext4_fs_fini(&mp->fs);

+	if (r != 0)

+		goto Finish;

+	mp->mounted = 0;

+	ext4_bcache_cleanup(mp->fs.bdev->bc);

+	ext4_bcache_fini_dynamic(mp->fs.bdev->bc);

+	r = ext4_block_fini(mp->fs.bdev);

+Finish:

+	mp->fs.bdev->fs = nil;

+	return r;

+}

+int ext4_journal_start(const char *mount_point)

+{

+	int r;

+	struct ext4_mountpoint *mp = ext4_get_mount(mount_point);

+	if (!mp)

+		return -1;

+	if (mp->fs.read_only)

+		return 0;

+	if (!ext4_sb_feature_com(&mp->fs.sb, EXT4_FCOM_HAS_JOURNAL))

+		return 0;

+	r = jbd_get_fs(&mp->fs, &mp->jbd_fs);

+	if (r != 0)

+		goto Finish;

+	r = jbd_journal_start(&mp->jbd_fs, &mp->jbd_journal);

+	if (r != 0) {

+		mp->jbd_fs.dirty = false;

+		jbd_put_fs(&mp->jbd_fs);

+		goto Finish;

+	}

+	mp->fs.jbd_fs = &mp->jbd_fs;

+	mp->fs.jbd_journal = &mp->jbd_journal;

+Finish:

+	return r;

+}

+int ext4_journal_stop(const char *mount_point)

+{

+	int r;

+	struct ext4_mountpoint *mp = ext4_get_mount(mount_point);

+	if (!mp)

+		return -1;

+	if (mp->fs.read_only)

+		return 0;

+	if (!ext4_sb_feature_com(&mp->fs.sb, EXT4_FCOM_HAS_JOURNAL))

+		return 0;

+	r = jbd_journal_stop(&mp->jbd_journal);

+	if (r != 0) {

+		mp->jbd_fs.dirty = false;

+		jbd_put_fs(&mp->jbd_fs);

+		mp->fs.jbd_journal = nil;

+		mp->fs.jbd_fs = nil;

+		goto Finish;

+	}

+	r = jbd_put_fs(&mp->jbd_fs);

+	if (r != 0) {

+		mp->fs.jbd_journal = nil;

+		mp->fs.jbd_fs = nil;

+		goto Finish;

+	}

+	mp->fs.jbd_journal = nil;

+	mp->fs.jbd_fs = nil;

+Finish:

+	return r;

+}

+int ext4_recover(const char *mount_point)

+{

+	struct ext4_mountpoint *mp = ext4_get_mount(mount_point);

+	int r;

+	if (!mp)

+		return -1;

+	EXT4_MP_LOCK(mp);

+	if (!ext4_sb_feature_com(&mp->fs.sb, EXT4_FCOM_HAS_JOURNAL))

+		return 0;

+	struct jbd_fs *jbd_fs = ext4_calloc(1, sizeof(struct jbd_fs));

+	if (!jbd_fs) {

+		werrstr("memory");

+		r = -1;

+		goto Finish;

+	}

+	r = jbd_get_fs(&mp->fs, jbd_fs);

+	if (r != 0) {

+		ext4_free(jbd_fs);

+		goto Finish;

+	}

+	r = jbd_recover(jbd_fs);

+	jbd_put_fs(jbd_fs);

+	ext4_free(jbd_fs);

+	if (r == 0 && !mp->fs.read_only) {

+		u32int bgid;

+		u64int free_blocks_count = 0;

+		u32int free_inodes_count = 0;

+		struct ext4_block_group_ref bg_ref;

+		/* Update superblock's stats */

+		for (bgid = 0;bgid < ext4_block_group_cnt(&mp->fs.sb);bgid++) {

+			r = ext4_fs_get_block_group_ref(&mp->fs, bgid, &bg_ref);

+			if (r != 0)

+				goto Finish;

+			free_blocks_count +=

+				ext4_bg_get_free_blocks_count(bg_ref.block_group,

+						&mp->fs.sb);

+			free_inodes_count +=

+				ext4_bg_get_free_inodes_count(bg_ref.block_group,

+						&mp->fs.sb);

+			ext4_fs_put_block_group_ref(&bg_ref);

+		}

+		ext4_sb_set_free_blocks_cnt(&mp->fs.sb, free_blocks_count);

+		ext4_set32(&mp->fs.sb, free_inodes_count, free_inodes_count);

+		/* We don't need to save the superblock stats immediately. */

+	}

+Finish:

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_trans_start(struct ext4_mountpoint *mp)

+{

+	int r = 0;

+	if (mp->fs.jbd_journal && !mp->fs.curr_trans) {

+		struct jbd_journal *journal = mp->fs.jbd_journal;

+		struct jbd_trans *trans;

+		trans = jbd_journal_new_trans(journal);

+		if (!trans) {

+			werrstr("memory");

+			r = -1;

+			goto Finish;

+		}

+		mp->fs.curr_trans = trans;

+	}

+Finish:

+	return r;

+}

+int ext4_trans_stop(struct ext4_mountpoint *mp)

+{

+	int r = 0;

+	if (mp->fs.jbd_journal && mp->fs.curr_trans) {

+		struct jbd_journal *journal = mp->fs.jbd_journal;

+		struct jbd_trans *trans = mp->fs.curr_trans;

+		r = jbd_journal_commit_trans(journal, trans);

+		mp->fs.curr_trans = nil;

+	}

+	return r;

+}

+void ext4_trans_abort(struct ext4_mountpoint *mp)

+{

+	if (mp->fs.jbd_journal && mp->fs.curr_trans) {

+		struct jbd_journal *journal = mp->fs.jbd_journal;

+		struct jbd_trans *trans = mp->fs.curr_trans;

+		jbd_journal_free_trans(journal, trans, true);

+		mp->fs.curr_trans = nil;

+	}

+}

+int ext4_mount_point_stats(const char *mount_point,

+			   struct ext4_mount_stats *stats)

+{

+	struct ext4_mountpoint *mp = ext4_get_mount(mount_point);

+	if (!mp)

+		return -1;

+	EXT4_MP_LOCK(mp);

+	stats->inodes_count = ext4_get32(&mp->fs.sb, inodes_count);

+	stats->free_inodes_count = ext4_get32(&mp->fs.sb, free_inodes_count);

+	stats->blocks_count = ext4_sb_get_blocks_cnt(&mp->fs.sb);

+	stats->free_blocks_count = ext4_sb_get_free_blocks_cnt(&mp->fs.sb);

+	stats->block_size = ext4_sb_get_block_size(&mp->fs.sb);

+	stats->block_group_count = ext4_block_group_cnt(&mp->fs.sb);

+	stats->blocks_per_group = ext4_get32(&mp->fs.sb, blocks_per_group);

+	stats->inodes_per_group = ext4_get32(&mp->fs.sb, inodes_per_group);

+	memcpy(stats->volume_name, mp->fs.sb.volume_name, 16);

+	EXT4_MP_UNLOCK(mp);

+	return 0;

+}

+int ext4_mount_setup_locks(const char *mount_point,

+			   const struct ext4_lock *locks)

+{

+	u32int i;

+	struct ext4_mountpoint *mp = nil;

+	for (i = 0; i < CONFIG_EXT4_MOUNTPOINTS_COUNT; ++i) {

+		if (!strcmp(s_mp[i].name, mount_point)) {

+			mp = &s_mp[i];

+			break;

+		}

+	}

+	if (!mp) {

+		werrstr("mount point not found: %s", mount_point);

+		return -1;

+	}

+	mp->os_locks = locks;

+	return 0;

+}

+/********************************FILE OPERATIONS*****************************/

+static int ext4_path_check(const char *path, bool *is_goal)

+{

+	int i;

+	for (i = 0; i < EXT4_DIRECTORY_FILENAME_LEN; ++i) {

+		if (path[i] == '/') {

+			*is_goal = false;

+			return i;

+		}

+		if (path[i] == 0) {

+			*is_goal = true;

+			return i;

+		}

+	}

+	return 0;

+}

+static bool ext4_parse_flags(const char *flags, u32int *file_flags)

+{

+	if (!flags)

+		return false;

+	if (!strcmp(flags, "r") || !strcmp(flags, "rb")) {

+		*file_flags = O_RDONLY;

+		return true;

+	}

+	if (!strcmp(flags, "w") || !strcmp(flags, "wb")) {

+		*file_flags = O_WRONLY | O_CREAT | O_TRUNC;

+		return true;

+	}

+	if (!strcmp(flags, "a") || !strcmp(flags, "ab")) {

+		*file_flags = O_WRONLY | O_CREAT | O_APPEND;

+		return true;

+	}

+	if (!strcmp(flags, "r+") || !strcmp(flags, "rb+") ||

+	    !strcmp(flags, "r+b")) {

+		*file_flags = O_RDWR;

+		return true;

+	}

+	if (!strcmp(flags, "w+") || !strcmp(flags, "wb+") ||

+	    !strcmp(flags, "w+b")) {

+		*file_flags = O_RDWR | O_CREAT | O_TRUNC;

+		return true;

+	}

+	if (!strcmp(flags, "a+") || !strcmp(flags, "ab+") ||

+	    !strcmp(flags, "a+b")) {

+		*file_flags = O_RDWR | O_CREAT | O_APPEND;

+		return true;

+	}

+	return false;

+}

+static int ext4_trunc_inode(struct ext4_mountpoint *mp,

+			    u32int index, u64int new_size)

+{

+	int r;

+	struct ext4_fs *const fs = &mp->fs;

+	struct ext4_inode_ref inode_ref;

+	u64int inode_size;

+	bool has_trans = mp->fs.jbd_journal && mp->fs.curr_trans;

+	r = ext4_fs_get_inode_ref(fs, index, &inode_ref);

+	if (r != 0)

+		return r;

+	inode_size = ext4_inode_get_size(&fs->sb, inode_ref.inode);

+	ext4_fs_put_inode_ref(&inode_ref);

+	if (has_trans)

+		ext4_trans_stop(mp);

+	while (inode_size > new_size + CONFIG_MAX_TRUNCATE_SIZE) {

+		inode_size -= CONFIG_MAX_TRUNCATE_SIZE;

+		ext4_trans_start(mp);

+		r = ext4_fs_get_inode_ref(fs, index, &inode_ref);

+		if (r != 0) {

+			ext4_trans_abort(mp);

+			break;

+		}

+		r = ext4_fs_truncate_inode(&inode_ref, inode_size);

+		if (r != 0)

+			ext4_fs_put_inode_ref(&inode_ref);

+		else

+			r = ext4_fs_put_inode_ref(&inode_ref);

+		if (r != 0) {

+			ext4_trans_abort(mp);

+			goto Finish;

+		} else

+			ext4_trans_stop(mp);

+	}

+	if (inode_size > new_size) {

+		inode_size = new_size;

+		ext4_trans_start(mp);

+		r = ext4_fs_get_inode_ref(fs, index, &inode_ref);

+		if (r != 0) {

+			ext4_trans_abort(mp);

+			goto Finish;

+		}

+		r = ext4_fs_truncate_inode(&inode_ref, inode_size);

+		if (r != 0)

+			ext4_fs_put_inode_ref(&inode_ref);

+		else

+			r = ext4_fs_put_inode_ref(&inode_ref);

+		if (r != 0)

+			ext4_trans_abort(mp);

+		else

+			ext4_trans_stop(mp);

+	}

+Finish:

+	if (has_trans)

+		ext4_trans_start(mp);

+	return r;

+}

+static int ext4_trunc_dir(struct ext4_mountpoint *mp,

+			  struct ext4_inode_ref *parent,

+			  struct ext4_inode_ref *dir)

+{

+	int r;

+	bool is_dir = ext4_inode_is_type(&mp->fs.sb, dir->inode,

+			EXT4_INODE_MODE_DIRECTORY);

+	u32int block_size = ext4_sb_get_block_size(&mp->fs.sb);

+	if (!is_dir) {

+		werrstr("not a directory");

+		return -1;

+	}

+	/* Initialize directory index if supported */

+	if (ext4_sb_feature_com(&mp->fs.sb, EXT4_FCOM_DIR_INDEX)) {

+		r = ext4_dir_dx_init(dir, parent);

+		if (r != 0)

+			return r;

+		r = ext4_trunc_inode(mp, dir->index,

+				     EXT4_DIR_DX_INIT_BCNT * block_size);

+		if (r != 0)

+			return r;

+	} else {

+		r = ext4_trunc_inode(mp, dir->index, block_size);

+		if (r != 0)

+			return r;

+	}

+	return ext4_fs_truncate_inode(dir, 0);

+}

+/*

+ * NOTICE: if filetype is equal to EXT4_DIRENTRY_UNKNOWN,

+ * any filetype of the target dir entry will be accepted.

+ */

+static int ext4_generic_open2(ext4_file *f, const char *path, int flags,

+			      int ftype, u32int *parent_inode,

+			      u32int *name_off)

+{

+	bool is_goal = false;

+	u32int imode = EXT4_INODE_MODE_DIRECTORY;

+	u32int next_inode;

+	int r;

+	int len;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	struct ext4_dir_search_result result;

+	struct ext4_inode_ref ref;

+	f->mp = 0;

+	if (!mp)

+		return -1;

+	struct ext4_fs *const fs = &mp->fs;

+	struct ext4_sblock *const sb = &mp->fs.sb;

+	if (fs->read_only && flags & O_CREAT) {

+		werrstr(Erdonlyfs);

+		return -1;

+	}

+	f->flags = flags;

+	/*Skip mount point*/

+	path += strlen(mp->name);

+	if (name_off)

+		*name_off = strlen(mp->name);

+	/*Load root*/

+	r = ext4_fs_get_inode_ref(fs, EXT4_INODE_ROOT_INDEX, &ref);

+	if (r != 0)

+		return r;

+	if (parent_inode)

+		*parent_inode = ref.index;

+	while (1) {

+		len = ext4_path_check(path, &is_goal);

+		if (!len) {

+			/*If root open was request.*/

+			if (ftype == EXT4_DE_DIR || ftype == EXT4_DE_UNKNOWN)

+				if (is_goal)

+					break;

+Notfound:

+			werrstr(Enotfound);

+			r = -1;

+			break;

+		}

+		r = ext4_dir_find_entry(&result, &ref, path, len);

+		if (r != 0) {

+			/*Destroy last result*/

+			ext4_dir_destroy_result(&ref, &result);

+			if (r != EXT4_ERR_NOT_FOUND)

+				break;

+			if (!(f->flags & O_CREAT))

+				break;

+			/*O_CREAT allows create new entry*/

+			struct ext4_inode_ref child_ref;

+			r = ext4_fs_alloc_inode(fs, &child_ref,

+					is_goal ? ftype : EXT4_DE_DIR);

+			if (r != 0)

+				break;

+			ext4_fs_inode_blocks_init(fs, &child_ref);

+			/*Link with root dir.*/

+			r = ext4_link(mp, &ref, &child_ref, path, len, false);

+			if (r != 0) {

+				/*Fail. Free new inode.*/

+				ext4_fs_free_inode(&child_ref);

+				/*We do not want to write new inode.

+				  But block has to be released.*/

+				child_ref.dirty = false;

+				ext4_fs_put_inode_ref(&child_ref);

+				break;

+			}

+			ext4_fs_put_inode_ref(&child_ref);

+			continue;

+		}

+		if (parent_inode)

+			*parent_inode = ref.index;

+		next_inode = ext4_dir_en_get_inode(result.dentry);

+		if (ext4_sb_feature_incom(sb, EXT4_FINCOM_FILETYPE)) {

+			u8int t;

+			t = ext4_dir_en_get_inode_type(sb, result.dentry);

+			imode = ext4_fs_correspond_inode_mode(t);

+		} else {

+			struct ext4_inode_ref child_ref;

+			r = ext4_fs_get_inode_ref(fs, next_inode, &child_ref);

+			if (r != 0)

+				break;

+			imode = ext4_inode_type(sb, child_ref.inode);

+			ext4_fs_put_inode_ref(&child_ref);

+		}

+		r = ext4_dir_destroy_result(&ref, &result);

+		if (r != 0)

+			break;

+		/*If expected file error*/

+		if (imode != EXT4_INODE_MODE_DIRECTORY && !is_goal)

+			goto Notfound;

+		if (ftype != EXT4_DE_UNKNOWN) {

+			bool df = imode != ext4_fs_correspond_inode_mode(ftype);

+			if (df && is_goal)

+				goto Notfound;

+		}

+		r = ext4_fs_put_inode_ref(&ref);

+		if (r != 0)

+			break;

+		r = ext4_fs_get_inode_ref(fs, next_inode, &ref);

+		if (r != 0)

+			break;

+		if (is_goal)

+			break;

+		path += len + 1;

+		if (name_off)

+			*name_off += len + 1;

+	}

+	if (r != 0) {

+		ext4_fs_put_inode_ref(&ref);

+		return r;

+	}

+	if (is_goal) {

+		if ((f->flags & O_TRUNC) && (imode == EXT4_INODE_MODE_FILE)) {

+			r = ext4_trunc_inode(mp, ref.index, 0);

+			if (r != 0) {

+				ext4_fs_put_inode_ref(&ref);

+				return r;

+			}

+		}

+		f->mp = mp;

+		f->fsize = ext4_inode_get_size(sb, ref.inode);

+		f->inode = ref.index;

+		f->fpos = 0;

+		if (f->flags & O_APPEND)

+			f->fpos = f->fsize;

+	}

+	return ext4_fs_put_inode_ref(&ref);

+}

+/****************************************************************************/

+static int ext4_generic_open(ext4_file *f, const char *path, const char *flags,

+			     bool file_expect, u32int *parent_inode,

+			     u32int *name_off)

+{

+	u32int iflags;

+	int filetype;

+	int r;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	if (ext4_parse_flags(flags, &iflags) == false)

+		return -1;

+	if (file_expect == true)

+		filetype = EXT4_DE_REG_FILE;

+	else

+		filetype = EXT4_DE_DIR;

+	if (iflags & O_CREAT)

+		ext4_trans_start(mp);

+	r = ext4_generic_open2(f, path, iflags, filetype, parent_inode, name_off);

+	if (iflags & O_CREAT) {

+		if (r == 0)

+			ext4_trans_stop(mp);

+		else

+			ext4_trans_abort(mp);

+	}

+	return r;

+}

+static int ext4_create_hardlink(const char *path,

+		struct ext4_inode_ref *child_ref, bool rename)

+{

+	bool is_goal = false;

+	u32int inode_mode;

+	u32int next_inode;

+	int r;

+	int len;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	struct ext4_dir_search_result result;

+	struct ext4_inode_ref ref;

+	if (!mp)

+		return -1;

+	struct ext4_fs *const fs = &mp->fs;

+	struct ext4_sblock *const sb = &mp->fs.sb;

+	/*Skip mount point*/

+	path += strlen(mp->name);

+	/*Load root*/

+	r = ext4_fs_get_inode_ref(fs, EXT4_INODE_ROOT_INDEX, &ref);

+	if (r != 0)

+		return r;

+	while (1) {

+		len = ext4_path_check(path, &is_goal);

+		if (!len) {

+			/*If root open was request.*/

+			werrstr(Enotfound);

+			r = -1;

+			break;

+		}

+		r = ext4_dir_find_entry(&result, &ref, path, len);

+		if (r != 0) {

+			/*Destroy last result*/

+			ext4_dir_destroy_result(&ref, &result);

+			if (r != EXT4_ERR_NOT_FOUND || !is_goal)

+				break;

+			/*Link with root dir.*/

+			r = ext4_link(mp, &ref, child_ref, path, len, rename);

+			break;

+		} else if (r == 0 && is_goal) {

+			/*Destroy last result*/

+			ext4_dir_destroy_result(&ref, &result);

+			werrstr(Eexists);

+			r = -1;

+			break;

+		}

+		next_inode = result.dentry->inode;

+		if (ext4_sb_feature_incom(sb, EXT4_FINCOM_FILETYPE)) {

+			u8int t;

+			t = ext4_dir_en_get_inode_type(sb, result.dentry);

+			inode_mode = ext4_fs_correspond_inode_mode(t);

+		} else {

+			struct ext4_inode_ref child_ref;

+			r = ext4_fs_get_inode_ref(fs, next_inode, &child_ref);

+			if (r != 0)

+				break;

+			inode_mode = ext4_inode_type(sb, child_ref.inode);

+			ext4_fs_put_inode_ref(&child_ref);

+		}

+		r = ext4_dir_destroy_result(&ref, &result);

+		if (r != 0)

+			break;

+		if (inode_mode != EXT4_INODE_MODE_DIRECTORY) {

+			werrstr(is_goal ? Eexists : Enotfound);

+			r = -1;

+			break;

+		}

+		r = ext4_fs_put_inode_ref(&ref);

+		if (r != 0)

+			break;

+		r = ext4_fs_get_inode_ref(fs, next_inode, &ref);

+		if (r != 0)

+			break;

+		if (is_goal)

+			break;

+		path += len + 1;

+	};

+	if (r != 0) {

+		ext4_fs_put_inode_ref(&ref);

+		return r;

+	}

+	r = ext4_fs_put_inode_ref(&ref);

+	return r;

+}

+static int ext4_remove_orig_reference(const char *path, u32int name_off,

+				      struct ext4_inode_ref *parent_ref,

+				      struct ext4_inode_ref *child_ref)

+{

+	bool is_goal;

+	int r;

+	int len;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	if (!mp)

+		return -1;

+	/*Set path*/

+	path += name_off;

+	len = ext4_path_check(path, &is_goal);

+	/* Remove entry from parent directory */

+	r = ext4_dir_remove_entry(parent_ref, path, len);

+	if (r != 0)

+		goto Finish;

+	if (ext4_inode_is_type(&mp->fs.sb, child_ref->inode,

+			       EXT4_INODE_MODE_DIRECTORY)) {

+		ext4_fs_inode_links_count_dec(parent_ref);

+		parent_ref->dirty = true;

+	}

+Finish:

+	return r;

+}

+int ext4_flink(const char *path, const char *hardlink_path)

+{

+	int r;

+	ext4_file f;

+	u32int name_off;

+	bool child_loaded = false;

+	u32int parent_inode, child_inode;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	struct ext4_mountpoint *target_mp = ext4_get_mount(hardlink_path);

+	struct ext4_inode_ref child_ref;

+	if (!mp)

+		return -1;

+	if (mp->fs.read_only) {

+		werrstr(Erdonlyfs);

+		return -1;

+	}

+	/* Will that happen? */

+	if (mp != target_mp) {

+		werrstr("mount point must be the same: %s vs %s", path, hardlink_path);

+		return -1;

+	}

+	EXT4_MP_LOCK(mp);

+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, &parent_inode, &name_off);

+	if (r != 0) {

+		EXT4_MP_UNLOCK(mp);

+		return r;

+	}

+	child_inode = f.inode;

+	ext4_fclose(&f);

+	ext4_trans_start(mp);

+	/*We have file to unlink. Load it.*/

+	r = ext4_fs_get_inode_ref(&mp->fs, child_inode, &child_ref);

+	if (r != 0)

+		goto Finish;

+	child_loaded = true;

+	/* Creating hardlink for directory is not allowed. */

+	if (ext4_inode_is_type(&mp->fs.sb, child_ref.inode, EXT4_INODE_MODE_DIRECTORY)) {

+		werrstr("is a directory");

+		r = -1;

+		goto Finish;

+	}

+	r = ext4_create_hardlink(hardlink_path, &child_ref, false);

+Finish:

+	if (child_loaded)

+		ext4_fs_put_inode_ref(&child_ref);

+	if (r != 0)

+		ext4_trans_abort(mp);

+	else

+		ext4_trans_stop(mp);

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_frename(const char *path, const char *new_path)

+{

+	int r;

+	ext4_file f;

+	u32int name_off;

+	bool parent_loaded = false, child_loaded = false;

+	u32int parent_inode, child_inode;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	struct ext4_inode_ref child_ref, parent_ref;

+	if (!mp)

+		return -1;

+	if (mp->fs.read_only) {

+		werrstr(Erdonlyfs);

+		return -1;

+	}

+	EXT4_MP_LOCK(mp);

+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN,

+				&parent_inode, &name_off);

+	if (r != 0) {

+		EXT4_MP_UNLOCK(mp);

+		return r;

+	}

+	child_inode = f.inode;

+	ext4_fclose(&f);

+	ext4_trans_start(mp);

+	/*Load parent*/

+	r = ext4_fs_get_inode_ref(&mp->fs, parent_inode, &parent_ref);

+	if (r != 0)

+		goto Finish;

+	parent_loaded = true;

+	/*We have file to unlink. Load it.*/

+	r = ext4_fs_get_inode_ref(&mp->fs, child_inode, &child_ref);

+	if (r != 0)

+		goto Finish;

+	child_loaded = true;

+	r = ext4_create_hardlink(new_path, &child_ref, true);

+	if (r != 0)

+		goto Finish;

+	r = ext4_remove_orig_reference(path, name_off, &parent_ref, &child_ref);

+	if (r != 0)

+		goto Finish;

+Finish:

+	if (parent_loaded)

+		ext4_fs_put_inode_ref(&parent_ref);

+	if (child_loaded)

+		ext4_fs_put_inode_ref(&child_ref);

+	if (r != 0)

+		ext4_trans_abort(mp);

+	else

+		ext4_trans_stop(mp);

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+/****************************************************************************/

+int ext4_get_sblock(const char *mount_point, struct ext4_sblock **sb)

+{

+	struct ext4_mountpoint *mp = ext4_get_mount(mount_point);

+	if (!mp)

+		return -1;

+	*sb = &mp->fs.sb;

+	return 0;

+}

+int ext4_cache_write_back(const char *path, bool on)

+{

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	int ret;

+	if (!mp)

+		return -1;

+	EXT4_MP_LOCK(mp);

+	ret = ext4_block_cache_write_back(mp->fs.bdev, on);

+	EXT4_MP_UNLOCK(mp);

+	return ret;

+}

+int ext4_cache_flush(const char *path)

+{

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	int ret;

+	if (!mp)

+		return -1;

+	EXT4_MP_LOCK(mp);

+	ret = ext4_block_cache_flush(mp->fs.bdev);

+	EXT4_MP_UNLOCK(mp);

+	return ret;

+}

+int ext4_fremove(const char *path)

+{

+	ext4_file f;

+	u32int parent_inode;

+	u32int child_inode;

+	u32int name_off;

+	bool is_goal;

+	int r;

+	int len;

+	struct ext4_inode_ref child;

+	struct ext4_inode_ref parent;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	if (!mp)

+		return -1;

+	if (mp->fs.read_only) {

+		werrstr(Erdonlyfs);

+		return -1;

+	}

+	EXT4_MP_LOCK(mp);

+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, &parent_inode, &name_off);

+	if (r != 0) {

+		EXT4_MP_UNLOCK(mp);

+		return r;

+	}

+	child_inode = f.inode;

+	ext4_fclose(&f);

+	ext4_trans_start(mp);

+	/*Load parent*/

+	r = ext4_fs_get_inode_ref(&mp->fs, parent_inode, &parent);

+	if (r != 0) {

+		ext4_trans_abort(mp);

+		EXT4_MP_UNLOCK(mp);

+		return r;

+	}

+	/*We have file to delete. Load it.*/

+	r = ext4_fs_get_inode_ref(&mp->fs, child_inode, &child);

+	if (r != 0) {

+		ext4_fs_put_inode_ref(&parent);

+		ext4_trans_abort(mp);

+		EXT4_MP_UNLOCK(mp);

+		return r;

+	}

+	/* We do not allow opening files here. */

+	if (ext4_inode_type(&mp->fs.sb, child.inode) ==

+	    EXT4_INODE_MODE_DIRECTORY) {

+		ext4_fs_put_inode_ref(&parent);

+		ext4_fs_put_inode_ref(&child);

+		ext4_trans_abort(mp);

+		EXT4_MP_UNLOCK(mp);

+		return r;

+	}

+	/*Link count will be zero, the inode should be freed. */

+	if (ext4_inode_get_links_cnt(child.inode) == 1) {

+		ext4_block_cache_write_back(mp->fs.bdev, 1);

+		r = ext4_trunc_inode(mp, child.index, 0);

+		if (r != 0) {

+			ext4_fs_put_inode_ref(&parent);

+			ext4_fs_put_inode_ref(&child);

+			ext4_trans_abort(mp);

+			EXT4_MP_UNLOCK(mp);

+			return r;

+		}

+		ext4_block_cache_write_back(mp->fs.bdev, 0);

+	}

+	/*Set path*/

+	path += name_off;

+	len = ext4_path_check(path, &is_goal);

+	/*Unlink from parent*/

+	r = ext4_unlink(mp, &parent, &child, path, len);

+	if (r != 0)

+		goto Finish;

+	/*Link count is zero, the inode should be freed. */

+	if (!ext4_inode_get_links_cnt(child.inode)) {

+		ext4_inode_set_del_time(child.inode, -1L);

+		r = ext4_fs_free_inode(&child);

+		if (r != 0)

+			goto Finish;

+	}

+Finish:

+	ext4_fs_put_inode_ref(&child);

+	ext4_fs_put_inode_ref(&parent);

+	if (r != 0)

+		ext4_trans_abort(mp);

+	else

+		ext4_trans_stop(mp);

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_fopen(ext4_file *file, const char *path, const char *flags)

+{

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	int r;

+	if (!mp)

+		return -1;

+	EXT4_MP_LOCK(mp);

+	ext4_block_cache_write_back(mp->fs.bdev, 1);

+	r = ext4_generic_open(file, path, flags, true, 0, 0);

+	ext4_block_cache_write_back(mp->fs.bdev, 0);

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_fopen2(ext4_file *file, const char *path, int flags)

+{

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	int r;

+	int filetype;

+	if (!mp)

+		return -1;

+	filetype = EXT4_DE_REG_FILE;

+	EXT4_MP_LOCK(mp);

+	ext4_block_cache_write_back(mp->fs.bdev, 1);

+	if (flags & O_CREAT)

+		ext4_trans_start(mp);

+	r = ext4_generic_open2(file, path, flags, filetype, nil, nil);

+	if (flags & O_CREAT) {

+		if (r == 0)

+			ext4_trans_stop(mp);

+		else

+			ext4_trans_abort(mp);

+	}

+	ext4_block_cache_write_back(mp->fs.bdev, 0);

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_fclose(ext4_file *file)

+{

+	assert(file && file->mp);

+	file->mp = 0;

+	file->flags = 0;

+	file->inode = 0;

+	file->fpos = 0;

+	file->fsize = 0;

+	return 0;

+}

+static int ext4_ftruncate_no_lock(ext4_file *file, u64int size)

+{

+	struct ext4_inode_ref ref;

+	int r;

+	r = ext4_fs_get_inode_ref(&file->mp->fs, file->inode, &ref);

+	if (r != 0) {

+		EXT4_MP_UNLOCK(file->mp);

+		return r;

+	}

+	/*Sync file size*/

+	file->fsize = ext4_inode_get_size(&file->mp->fs.sb, ref.inode);

+	if (file->fsize <= size) {

+		werrstr("space preallocation not supported");

+		r = -1;

+		goto Finish;

+	}

+	/*Start write back cache mode.*/

+	r = ext4_block_cache_write_back(file->mp->fs.bdev, 1);

+	if (r != 0)

+		goto Finish;

+	r = ext4_trunc_inode(file->mp, ref.index, size);

+	if (r != 0)

+		goto Finish;

+	file->fsize = size;

+	if (file->fpos > size)

+		file->fpos = size;

+	/*Stop write back cache mode*/

+	ext4_block_cache_write_back(file->mp->fs.bdev, 0);

+	if (r != 0)

+		goto Finish;

+Finish:

+	ext4_fs_put_inode_ref(&ref);

+	return r;

+}

+int ext4_ftruncate(ext4_file *f, u64int size)

+{

+	int r;

+	assert(f && f->mp);

+	if (f->mp->fs.read_only) {

+		werrstr(Erdonlyfs);

+		return -1;

+	}

+	if (f->flags & O_RDONLY) {

+		werrstr(Eperm);

+		return -1;

+	}

+	EXT4_MP_LOCK(f->mp);

+	ext4_trans_start(f->mp);

+	r = ext4_ftruncate_no_lock(f, size);

+	if( r == 0 )

+		ext4_trans_stop(f->mp);

+	else

+		ext4_trans_abort(f->mp);

+	EXT4_MP_UNLOCK(f->mp);

+	return r;

+}

+int ext4_fread(ext4_file *file, void *buf, usize size, usize *rcnt)

+{

+	u32int unalg;

+	u32int iblock_idx;

+	u32int iblock_last;

+	u32int block_size;

+	ext4_fsblk_t fblock;

+	ext4_fsblk_t fblock_start;

+	u32int fblock_count;

+	u8int *u8_buf = buf;

+	int r;

+	struct ext4_inode_ref ref;

+	assert(file && file->mp);

+	if (file->flags & O_WRONLY) {

+		werrstr(Eperm);

+		return -1;

+	}

+	if (!size)

+		return 0;

+	EXT4_MP_LOCK(file->mp);

+	struct ext4_fs *const fs = &file->mp->fs;

+	struct ext4_sblock *const sb = &file->mp->fs.sb;

+	if (rcnt)

+		*rcnt = 0;

+	r = ext4_fs_get_inode_ref(fs, file->inode, &ref);

+	if (r != 0) {

+		EXT4_MP_UNLOCK(file->mp);

+		return r;

+	}

+	/*Sync file size*/

+	file->fsize = ext4_inode_get_size(sb, ref.inode);

+	block_size = ext4_sb_get_block_size(sb);

+	size = ((u64int)size > (file->fsize - file->fpos))

+		? ((usize)(file->fsize - file->fpos)) : size;

+	iblock_idx = (u32int)((file->fpos) / block_size);

+	iblock_last = (u32int)((file->fpos + size) / block_size);

+	unalg = (file->fpos) % block_size;

+	/*If the size of symlink is smaller than 60 bytes*/

+	bool softlink;

+	softlink = ext4_inode_is_type(sb, ref.inode, EXT4_INODE_MODE_SOFTLINK);

+	if (softlink && file->fsize < sizeof(ref.inode->blocks)

+		     && !ext4_inode_get_blocks_count(sb, ref.inode)) {

+		char *content = (char *)ref.inode->blocks;

+		if (file->fpos < file->fsize) {

+			usize len = size;

+			if (unalg + size > (u32int)file->fsize)

+				len = (u32int)file->fsize - unalg;

+			memcpy(buf, content + unalg, len);

+			if (rcnt)

+				*rcnt = len;

+		}

+		r = 0;

+		goto Finish;

+	}

+	if (unalg) {

+		usize len =  size;

+		if (size > (block_size - unalg))

+			len = block_size - unalg;

+		r = ext4_fs_get_inode_dblk_idx(&ref, iblock_idx, &fblock, true);

+		if (r != 0)

+			goto Finish;

+		/* Do we get an unwritten range? */

+		if (fblock != 0) {

+			u64int off = fblock * block_size + unalg;

+			r = ext4_block_readbytes(file->mp->fs.bdev, off, u8_buf, len);

+			if (r != 0)

+				goto Finish;

+		} else {

+			/* Yes, we do. */

+			memset(u8_buf, 0, len);

+		}

+		u8_buf += len;

+		size -= len;

+		file->fpos += len;

+		if (rcnt)

+			*rcnt += len;

+		iblock_idx++;

+	}

+	fblock_start = 0;

+	fblock_count = 0;

+	while (size >= block_size) {

+		while (iblock_idx < iblock_last) {

+			r = ext4_fs_get_inode_dblk_idx(&ref, iblock_idx,

+						       &fblock, true);

+			if (r != 0)

+				goto Finish;

+			iblock_idx++;

+			if (!fblock_start)

+				fblock_start = fblock;

+			if ((fblock_start + fblock_count) != fblock)

+				break;

+			fblock_count++;

+		}

+		r = ext4_blocks_get_direct(file->mp->fs.bdev, u8_buf, fblock_start,

+					   fblock_count);

+		if (r != 0)

+			goto Finish;

+		size -= block_size * fblock_count;

+		u8_buf += block_size * fblock_count;

+		file->fpos += block_size * fblock_count;

+		if (rcnt)

+			*rcnt += block_size * fblock_count;

+		fblock_start = fblock;

+		fblock_count = 1;

+	}

+	if (size) {

+		u64int off;

+		r = ext4_fs_get_inode_dblk_idx(&ref, iblock_idx, &fblock, true);

+		if (r != 0)

+			goto Finish;

+		off = fblock * block_size;

+		r = ext4_block_readbytes(file->mp->fs.bdev, off, u8_buf, size);

+		if (r != 0)

+			goto Finish;

+		file->fpos += size;

+		if (rcnt)

+			*rcnt += size;

+	}

+Finish:

+	ext4_fs_put_inode_ref(&ref);

+	EXT4_MP_UNLOCK(file->mp);

+	return r;

+}

+int ext4_fwrite(ext4_file *file, const void *buf, usize size, usize *wcnt)

+{

+	u32int unalg;

+	u32int iblk_idx;

+	u32int iblock_last;

+	u32int ifile_blocks;

+	u32int block_size;

+	u32int fblock_count;

+	ext4_fsblk_t fblk;

+	ext4_fsblk_t fblock_start;

+	struct ext4_inode_ref ref;

+	const u8int *u8_buf = buf;

+	int r, rr = 0;

+	assert(file && file->mp);

+	if (file->mp->fs.read_only) {

+		werrstr(Erdonlyfs);

+		return -1;

+	}

+	if (file->flags & O_RDONLY) {

+		werrstr(Eperm);

+		return -1;

+	}

+	if (!size)

+		return 0;

+	EXT4_MP_LOCK(file->mp);

+	ext4_trans_start(file->mp);

+	struct ext4_fs *const fs = &file->mp->fs;

+	struct ext4_sblock *const sb = &file->mp->fs.sb;

+	if (wcnt)

+		*wcnt = 0;

+	r = ext4_fs_get_inode_ref(fs, file->inode, &ref);

+	if (r != 0) {

+		ext4_trans_abort(file->mp);

+		EXT4_MP_UNLOCK(file->mp);

+		return r;

+	}

+	/*Sync file size*/

+	file->fsize = ext4_inode_get_size(sb, ref.inode);

+	block_size = ext4_sb_get_block_size(sb);

+	iblock_last = (u32int)((file->fpos + size) / block_size);

+	iblk_idx = (u32int)(file->fpos / block_size);

+	ifile_blocks = (u32int)((file->fsize + block_size - 1) / block_size);

+	unalg = (file->fpos) % block_size;

+	if (unalg) {

+		usize len =  size;

+		u64int off;

+		if (size > (block_size - unalg))

+			len = block_size - unalg;

+		r = ext4_fs_init_inode_dblk_idx(&ref, iblk_idx, &fblk);

+		if (r != 0)

+			goto Finish;

+		off = fblk * block_size + unalg;

+		r = ext4_block_writebytes(file->mp->fs.bdev, off, u8_buf, len);

+		if (r != 0)

+			goto Finish;

+		u8_buf += len;

+		size -= len;

+		file->fpos += len;

+		if (wcnt)

+			*wcnt += len;

+		iblk_idx++;

+	}

+	/*Start write back cache mode.*/

+	r = ext4_block_cache_write_back(file->mp->fs.bdev, 1);

+	if (r != 0)

+		goto Finish;

+	fblock_start = 0;

+	fblock_count = 0;

+	while (size >= block_size) {

+		while (iblk_idx < iblock_last) {

+			if (iblk_idx < ifile_blocks) {

+				r = ext4_fs_init_inode_dblk_idx(&ref, iblk_idx,

+								&fblk);

+				if (r != 0)

+					goto Finish;

+			} else {

+				rr = ext4_fs_append_inode_dblk(&ref, &fblk,

+							       &iblk_idx);

+				if (rr != 0) {

+					/* Unable to append more blocks. But

+					 * some block might be allocated already

+					 * */

+					break;

+				}

+			}

+			iblk_idx++;

+			if (!fblock_start) {

+				fblock_start = fblk;

+			}

+			if ((fblock_start + fblock_count) != fblk)

+				break;

+			fblock_count++;

+		}

+		r = ext4_blocks_set_direct(file->mp->fs.bdev, u8_buf, fblock_start,

+					   fblock_count);

+		if (r != 0)

+			break;

+		size -= block_size * fblock_count;

+		u8_buf += block_size * fblock_count;

+		file->fpos += block_size * fblock_count;

+		if (wcnt)

+			*wcnt += block_size * fblock_count;

+		fblock_start = fblk;

+		fblock_count = 1;

+		if (rr != 0) {

+			/*ext4_fs_append_inode_block has failed and no

+			 * more blocks might be written. But node size

+			 * should be updated.*/

+			/* FIXME wth is happening here exactly? */

+			//r = rr;

+			goto out_fsize;

+		}

+	}

+	/*Stop write back cache mode*/

+	ext4_block_cache_write_back(file->mp->fs.bdev, 0);

+	if (r != 0)

+		goto Finish;

+	if (size) {

+		u64int off;

+		if (iblk_idx < ifile_blocks) {

+			r = ext4_fs_init_inode_dblk_idx(&ref, iblk_idx, &fblk);

+			if (r != 0)

+				goto Finish;

+		} else {

+			r = ext4_fs_append_inode_dblk(&ref, &fblk, &iblk_idx);

+			if (r != 0)

+				/*Node size sholud be updated.*/

+				goto out_fsize;

+		}

+		off = fblk * block_size;

+		r = ext4_block_writebytes(file->mp->fs.bdev, off, u8_buf, size);

+		if (r != 0)

+			goto Finish;

+		file->fpos += size;

+		if (wcnt)

+			*wcnt += size;

+	}

+out_fsize:

+	if (file->fpos > file->fsize) {

+		file->fsize = file->fpos;

+		ext4_inode_set_size(ref.inode, file->fsize);

+		ref.dirty = true;

+	}

+Finish:

+	r = ext4_fs_put_inode_ref(&ref);

+	if (r != 0)

+		ext4_trans_abort(file->mp);

+	else

+		ext4_trans_stop(file->mp);

+	EXT4_MP_UNLOCK(file->mp);

+	return r;

+}

+int ext4_fseek(ext4_file *file, s64int offset, u32int origin)

+{

+	switch (origin) {

+	case 0:

+		if (offset < 0 || (u64int)offset > file->fsize)

+			break;

+		file->fpos = offset;

+		return 0;

+	case 1:

+		if ((offset < 0 && (u64int)(-offset) > file->fpos) ||

+		    (offset > 0 &&

+		     (u64int)offset > (file->fsize - file->fpos)))

+			break;

+		file->fpos += offset;

+		return 0;

+	case 2:

+		if (offset < 0 || (u64int)offset > file->fsize)

+			break;

+		file->fpos = file->fsize - offset;

+		return 0;

+	}

+	werrstr(Einval);

+	return -1;

+}

+u64int ext4_ftell(ext4_file *file)

+{

+	return file->fpos;

+}

+u64int ext4_fsize(ext4_file *file)

+{

+	return file->fsize;

+}

+static int ext4_trans_get_inode_ref(const char *path,

+				    struct ext4_mountpoint *mp,

+				    struct ext4_inode_ref *inode_ref)

+{

+	int r;

+	ext4_file f;

+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);

+	if (r != 0)

+		return r;

+	ext4_trans_start(mp);

+	r = ext4_fs_get_inode_ref(&mp->fs, f.inode, inode_ref);

+	if (r != 0) {

+		ext4_trans_abort(mp);

+		return r;

+	}

+	return r;

+}

+static int ext4_trans_put_inode_ref(struct ext4_mountpoint *mp,

+				    struct ext4_inode_ref *inode_ref)

+{

+	int r;

+	r = ext4_fs_put_inode_ref(inode_ref);

+	if (r != 0)

+		ext4_trans_abort(mp);

+	else

+		ext4_trans_stop(mp);

+	return r;

+}

+int ext4_raw_inode_fill(const char *path, u32int *ret_ino,

+			struct ext4_inode *inode)

+{

+	int r;

+	ext4_file f;

+	struct ext4_inode_ref inode_ref;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	if (!mp)

+		return -1;

+	EXT4_MP_LOCK(mp);

+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);

+	if (r != 0) {

+		EXT4_MP_UNLOCK(mp);

+		return r;

+	}

+	/*Load parent*/

+	r = ext4_fs_get_inode_ref(&mp->fs, f.inode, &inode_ref);

+	if (r != 0) {

+		EXT4_MP_UNLOCK(mp);

+		return r;

+	}

+	if (ret_ino)

+		*ret_ino = f.inode;

+	if (inode)

+		memcpy(inode, inode_ref.inode, sizeof(struct ext4_inode));

+	ext4_fs_put_inode_ref(&inode_ref);

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_inode_exist(const char *path, int type)

+{

+	int r;

+	ext4_file f;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	if (!mp)

+		return -1;

+	EXT4_MP_LOCK(mp);

+	r = ext4_generic_open2(&f, path, O_RDONLY, type, nil, nil);

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_mode_set(const char *path, u32int mode)

+{

+	int r;

+	u32int orig_mode;

+	struct ext4_inode_ref inode_ref;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	if (!mp)

+		return -1;

+	if (mp->fs.read_only) {

+		werrstr(Erdonlyfs);

+		return -1;

+	}

+	EXT4_MP_LOCK(mp);

+	r = ext4_trans_get_inode_ref(path, mp, &inode_ref);

+	if (r != 0)

+		goto Finish;

+	orig_mode = ext4_inode_get_mode(&mp->fs.sb, inode_ref.inode);

+	orig_mode &= ~0xFFF;

+	orig_mode |= mode & 0xFFF;

+	ext4_inode_set_mode(&mp->fs.sb, inode_ref.inode, orig_mode);

+	inode_ref.dirty = true;

+	r = ext4_trans_put_inode_ref(mp, &inode_ref);

+	Finish:

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_owner_set(const char *path, u32int uid, u32int gid)

+{

+	int r;

+	struct ext4_inode_ref inode_ref;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	if (!mp)

+		return -1;

+	if (mp->fs.read_only) {

+		werrstr(Erdonlyfs);

+		return -1;

+	}

+	EXT4_MP_LOCK(mp);

+	r = ext4_trans_get_inode_ref(path, mp, &inode_ref);

+	if (r != 0)

+		goto Finish;

+	ext4_inode_set_uid(inode_ref.inode, uid);

+	ext4_inode_set_gid(inode_ref.inode, gid);

+	inode_ref.dirty = true;

+	r = ext4_trans_put_inode_ref(mp, &inode_ref);

+	Finish:

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_mode_get(const char *path, u32int *mode)

+{

+	struct ext4_inode_ref inode_ref;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	ext4_file f;

+	int r;

+	if (!mp)

+		return -1;

+	EXT4_MP_LOCK(mp);

+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);

+	if (r != 0)

+		goto Finish;

+	r = ext4_fs_get_inode_ref(&mp->fs, f.inode, &inode_ref);

+	if (r != 0)

+		goto Finish;

+	*mode = ext4_inode_get_mode(&mp->fs.sb, inode_ref.inode);

+	r = ext4_fs_put_inode_ref(&inode_ref);

+	Finish:

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_owner_get(const char *path, u32int *uid, u32int *gid)

+{

+	struct ext4_inode_ref inode_ref;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	ext4_file f;

+	int r;

+	if (!mp)

+		return -1;

+	EXT4_MP_LOCK(mp);

+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);

+	if (r != 0)

+		goto Finish;

+	r = ext4_fs_get_inode_ref(&mp->fs, f.inode, &inode_ref);

+	if (r != 0)

+		goto Finish;

+	*uid = ext4_inode_get_uid(inode_ref.inode);

+	*gid = ext4_inode_get_gid(inode_ref.inode);

+	r = ext4_fs_put_inode_ref(&inode_ref);

+	Finish:

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_atime_set(const char *path, u32int atime)

+{

+	struct ext4_inode_ref inode_ref;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	int r;

+	if (!mp)

+		return -1;

+	if (mp->fs.read_only) {

+		werrstr(Erdonlyfs);

+		return -1;

+	}

+	EXT4_MP_LOCK(mp);

+	r = ext4_trans_get_inode_ref(path, mp, &inode_ref);

+	if (r != 0)

+		goto Finish;

+	ext4_inode_set_access_time(inode_ref.inode, atime);

+	inode_ref.dirty = true;

+	r = ext4_trans_put_inode_ref(mp, &inode_ref);

+	Finish:

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_mtime_set(const char *path, u32int mtime)

+{

+	struct ext4_inode_ref inode_ref;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	int r;

+	if (!mp)

+		return -1;

+	if (mp->fs.read_only) {

+		werrstr(Erdonlyfs);

+		return -1;

+	}

+	EXT4_MP_LOCK(mp);

+	r = ext4_trans_get_inode_ref(path, mp, &inode_ref);

+	if (r != 0)

+		goto Finish;

+	ext4_inode_set_modif_time(inode_ref.inode, mtime);

+	inode_ref.dirty = true;

+	r = ext4_trans_put_inode_ref(mp, &inode_ref);

+	Finish:

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_ctime_set(const char *path, u32int ctime)

+{

+	struct ext4_inode_ref inode_ref;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	int r;

+	if (!mp)

+		return -1;

+	if (mp->fs.read_only) {

+		werrstr(Erdonlyfs);

+		return -1;

+	}

+	EXT4_MP_LOCK(mp);

+	r = ext4_trans_get_inode_ref(path, mp, &inode_ref);

+	if (r != 0)

+		goto Finish;

+	ext4_inode_set_change_inode_time(inode_ref.inode, ctime);

+	inode_ref.dirty = true;

+	r = ext4_trans_put_inode_ref(mp, &inode_ref);

+	Finish:

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_atime_get(const char *path, u32int *atime)

+{

+	struct ext4_inode_ref inode_ref;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	ext4_file f;

+	int r;

+	if (!mp)

+		return -1;

+	EXT4_MP_LOCK(mp);

+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);

+	if (r != 0)

+		goto Finish;

+	r = ext4_fs_get_inode_ref(&mp->fs, f.inode, &inode_ref);

+	if (r != 0)

+		goto Finish;

+	*atime = ext4_inode_get_access_time(inode_ref.inode);

+	r = ext4_fs_put_inode_ref(&inode_ref);

+	Finish:

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_mtime_get(const char *path, u32int *mtime)

+{

+	struct ext4_inode_ref inode_ref;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	ext4_file f;

+	int r;

+	if (!mp)

+		return -1;

+	EXT4_MP_LOCK(mp);

+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);

+	if (r != 0)

+		goto Finish;

+	r = ext4_fs_get_inode_ref(&mp->fs, f.inode, &inode_ref);

+	if (r != 0)

+		goto Finish;

+	*mtime = ext4_inode_get_modif_time(inode_ref.inode);

+	r = ext4_fs_put_inode_ref(&inode_ref);

+	Finish:

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_ctime_get(const char *path, u32int *ctime)

+{

+	struct ext4_inode_ref inode_ref;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	ext4_file f;

+	int r;

+	if (!mp)

+		return -1;

+	EXT4_MP_LOCK(mp);

+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);

+	if (r != 0)

+		goto Finish;

+	r = ext4_fs_get_inode_ref(&mp->fs, f.inode, &inode_ref);

+	if (r != 0)

+		goto Finish;

+	*ctime = ext4_inode_get_change_inode_time(inode_ref.inode);

+	r = ext4_fs_put_inode_ref(&inode_ref);

+	Finish:

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+static int ext4_fsymlink_set(ext4_file *f, const void *buf, u32int size)

+{

+	struct ext4_inode_ref ref;

+	u32int sblock;

+	ext4_fsblk_t fblock;

+	u32int block_size;

+	int r;

+	assert(f && f->mp);

+	if (!size)

+		return 0;

+	r = ext4_fs_get_inode_ref(&f->mp->fs, f->inode, &ref);

+	if (r != 0)

+		return r;

+	/*Sync file size*/

+	block_size = ext4_sb_get_block_size(&f->mp->fs.sb);

+	if (size > block_size) {

+		werrstr("invalid block size");

+		r = -1;

+		goto Finish;

+	}

+	r = ext4_ftruncate_no_lock(f, 0);

+	if (r != 0)

+		goto Finish;

+	/*Start write back cache mode.*/

+	r = ext4_block_cache_write_back(f->mp->fs.bdev, 1);

+	if (r != 0)

+		goto Finish;

+	/*If the size of symlink is smaller than 60 bytes*/

+	if (size < sizeof(ref.inode->blocks)) {

+		memset(ref.inode->blocks, 0, sizeof(ref.inode->blocks));

+		memcpy(ref.inode->blocks, buf, size);

+		ext4_inode_clear_flag(ref.inode, EXT4_INODE_FLAG_EXTENTS);

+	} else {

+		u64int off;

+		ext4_fs_inode_blocks_init(&f->mp->fs, &ref);

+		r = ext4_fs_append_inode_dblk(&ref, &fblock, &sblock);

+		if (r != 0)

+			goto Finish;

+		off = fblock * block_size;

+		r = ext4_block_writebytes(f->mp->fs.bdev, off, buf, size);

+		if (r != 0)

+			goto Finish;

+	}

+	/*Stop write back cache mode*/

+	ext4_block_cache_write_back(f->mp->fs.bdev, 0);

+	if (r != 0)

+		goto Finish;

+	ext4_inode_set_size(ref.inode, size);

+	ref.dirty = true;

+	f->fsize = size;

+	if (f->fpos > size)

+		f->fpos = size;

+Finish:

+	ext4_fs_put_inode_ref(&ref);

+	return r;

+}

+int ext4_fsymlink(const char *target, const char *path)

+{

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	int r;

+	ext4_file f;

+	int filetype;

+	if (!mp)

+		return -1;

+	if (mp->fs.read_only) {

+		werrstr(Erdonlyfs);

+		return -1;

+	}

+	filetype = EXT4_DE_SYMLINK;

+	EXT4_MP_LOCK(mp);

+	ext4_block_cache_write_back(mp->fs.bdev, 1);

+	ext4_trans_start(mp);

+	r = ext4_generic_open2(&f, path, O_RDWR | O_CREAT, filetype, nil, nil);

+	if (r == 0)

+		r = ext4_fsymlink_set(&f, target, strlen(target));

+	else

+		goto Finish;

+	ext4_fclose(&f);

+Finish:

+	if (r != 0)

+		ext4_trans_abort(mp);

+	else

+		ext4_trans_stop(mp);

+	ext4_block_cache_write_back(mp->fs.bdev, 0);

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_readlink(const char *path, char *buf, usize bufsize, usize *rcnt)

+{

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	int r;

+	ext4_file f;

+	int filetype;

+	assert(buf != nil);

+	if (!mp)

+		return -1;

+	filetype = EXT4_DE_SYMLINK;

+	EXT4_MP_LOCK(mp);

+	ext4_block_cache_write_back(mp->fs.bdev, 1);

+	r = ext4_generic_open2(&f, path, O_RDONLY, filetype, nil, nil);

+	if (r == 0)

+		r = ext4_fread(&f, buf, bufsize, rcnt);

+	else

+		goto Finish;

+	ext4_fclose(&f);

+Finish:

+	ext4_block_cache_write_back(mp->fs.bdev, 0);

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+static int ext4_mknod_set(ext4_file *f, u32int dev)

+{

+	struct ext4_inode_ref ref;

+	int r;

+	assert(f && f->mp);

+	r = ext4_fs_get_inode_ref(&f->mp->fs, f->inode, &ref);

+	if (r != 0)

+		return r;

+	ext4_inode_set_dev(ref.inode, dev);

+	ext4_inode_set_size(ref.inode, 0);

+	ref.dirty = true;

+	f->fsize = 0;

+	f->fpos = 0;

+	r = ext4_fs_put_inode_ref(&ref);

+	return r;

+}

+int ext4_mknod(const char *path, int filetype, u32int dev)

+{

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	int r;

+	ext4_file f;

+	if (!mp)

+		return -1;

+	if (mp->fs.read_only) {

+		werrstr(Erdonlyfs);

+		return -1;

+	}

+	/*

+	 * The filetype shouldn't be normal file, directory or

+	 * unknown.

+	 */

+	if (filetype == EXT4_DE_UNKNOWN ||

+	    filetype == EXT4_DE_REG_FILE ||

+	    filetype == EXT4_DE_DIR ||

+	    filetype == EXT4_DE_SYMLINK) {

+		werrstr(Einval);

+		return -1;

+	}

+	/*

+	 * Nor should it be any bogus value.

+	 */

+	if (filetype != EXT4_DE_CHRDEV &&

+	    filetype != EXT4_DE_BLKDEV &&

+	    filetype != EXT4_DE_FIFO &&

+	    filetype != EXT4_DE_SOCK) {

+		werrstr(Einval);

+		return -1;

+	}

+	EXT4_MP_LOCK(mp);

+	ext4_block_cache_write_back(mp->fs.bdev, 1);

+	ext4_trans_start(mp);

+	r = ext4_generic_open2(&f, path, O_RDWR | O_CREAT, filetype, nil, nil);

+	if (r == 0) {

+		if (filetype == EXT4_DE_CHRDEV ||

+		    filetype == EXT4_DE_BLKDEV)

+			r = ext4_mknod_set(&f, dev);

+	} else {

+		goto Finish;

+	}

+	ext4_fclose(&f);

+Finish:

+	if (r != 0)

+		ext4_trans_abort(mp);

+	else

+		ext4_trans_stop(mp);

+	ext4_block_cache_write_back(mp->fs.bdev, 0);

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+/*********************************DIRECTORY OPERATION************************/

+int ext4_dir_rm(const char *path)

+{

+	int r;

+	int len;

+	ext4_file f;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	struct ext4_inode_ref act;

+	struct ext4_inode_ref child;

+	struct ext4_dir_iter it;

+	u32int name_off;

+	u32int inode_up;

+	u32int inode_current;

+	u32int depth = 1;

+	bool has_children;

+	bool is_goal;

+	bool dir_end;

+	if (!mp)

+		return -1;

+	if (mp->fs.read_only) {

+		werrstr(Erdonlyfs);

+		return -1;

+	}

+	EXT4_MP_LOCK(mp);

+	struct ext4_fs *const fs = &mp->fs;

+	/*Check if exist.*/

+	r = ext4_generic_open(&f, path, "r", false, &inode_up, &name_off);

+	if (r != 0) {

+		EXT4_MP_UNLOCK(mp);

+		return r;

+	}

+	path += name_off;

+	len = ext4_path_check(path, &is_goal);

+	inode_current = f.inode;

+	ext4_block_cache_write_back(mp->fs.bdev, 1);

+	do {

+		u64int act_curr_pos = 0;

+		has_children = false;

+		dir_end = false;

+		while (r == 0 && !has_children && !dir_end) {

+			/*Load directory node.*/

+			r = ext4_fs_get_inode_ref(fs, inode_current, &act);

+			if (r != 0) {

+				break;

+			}

+			/*Initialize iterator.*/

+			r = ext4_dir_iterator_init(&it, &act, act_curr_pos);

+			if (r != 0) {

+				ext4_fs_put_inode_ref(&act);

+				break;

+			}

+			if (!it.curr) {

+				dir_end = true;

+				goto End;

+			}

+			ext4_trans_start(mp);

+			/*Get up directory inode when ".." entry*/

+			if ((it.curr->name_len == 2) &&

+			    ext4_is_dots(it.curr->name, it.curr->name_len)) {

+				inode_up = ext4_dir_en_get_inode(it.curr);

+			}

+			/*If directory or file entry,  but not "." ".." entry*/

+			if (!ext4_is_dots(it.curr->name, it.curr->name_len)) {

+				/*Get child inode reference do unlink

+				 * directory/file.*/

+				u32int cinode;

+				u32int inode_type;

+				cinode = ext4_dir_en_get_inode(it.curr);

+				r = ext4_fs_get_inode_ref(fs, cinode, &child);

+				if (r != 0)

+					goto End;

+				/*If directory with no leaf children*/

+				r = ext4_has_children(&has_children, &child);

+				if (r != 0) {

+					ext4_fs_put_inode_ref(&child);

+					goto End;

+				}

+				if (has_children) {

+					/*Has directory children. Go into this

+					 * directory.*/

+					inode_up = inode_current;

+					inode_current = cinode;

+					depth++;

+					ext4_fs_put_inode_ref(&child);

+					goto End;

+				}

+				inode_type = ext4_inode_type(&mp->fs.sb,

+						child.inode);

+				/* Truncate */

+				if (inode_type != EXT4_INODE_MODE_DIRECTORY)

+					r = ext4_trunc_inode(mp, child.index, 0);

+				else

+					r = ext4_trunc_dir(mp, &act, &child);

+				if (r != 0) {

+					ext4_fs_put_inode_ref(&child);

+					goto End;

+				}

+				/*No children in child directory or file. Just

+				 * unlink.*/

+				r = ext4_unlink(f.mp, &act, &child,

+						(char *)it.curr->name,

+						it.curr->name_len);

+				if (r != 0) {

+					ext4_fs_put_inode_ref(&child);

+					goto End;

+				}

+				ext4_inode_set_del_time(child.inode, -1L);

+				ext4_inode_set_links_cnt(child.inode, 0);

+				child.dirty = true;

+				r = ext4_fs_free_inode(&child);

+				if (r != 0) {

+					ext4_fs_put_inode_ref(&child);

+					goto End;

+				}

+				r = ext4_fs_put_inode_ref(&child);

+				if (r != 0)

+					goto End;

+			}

+			r = ext4_dir_iterator_next(&it);

+			if (r != 0)

+				goto End;

+			act_curr_pos = it.curr_off;

+End:

+			ext4_dir_iterator_fini(&it);

+			if (r == 0)

+				r = ext4_fs_put_inode_ref(&act);

+			else

+				ext4_fs_put_inode_ref(&act);

+			if (r != 0)

+				ext4_trans_abort(mp);

+			else

+				ext4_trans_stop(mp);

+		}

+		if (dir_end) {

+			/*Directory iterator reached last entry*/

+			depth--;

+			if (depth)

+				inode_current = inode_up;

+		}

+		if (r != 0)

+			break;

+	} while (depth);

+	/*Last unlink*/

+	if (r == 0 && !depth) {

+		/*Load parent.*/

+		struct ext4_inode_ref parent;

+		r = ext4_fs_get_inode_ref(&f.mp->fs, inode_up,

+				&parent);

+		if (r != 0)

+			goto Finish;

+		r = ext4_fs_get_inode_ref(&f.mp->fs, inode_current,

+				&act);

+		if (r != 0) {

+			ext4_fs_put_inode_ref(&act);

+			goto Finish;

+		}

+		ext4_trans_start(mp);

+		/* In this place all directories should be

+		 * unlinked.

+		 * Last unlink from root of current directory*/

+		r = ext4_unlink(f.mp, &parent, &act,

+				(char *)path, len);

+		if (r != 0) {

+			ext4_fs_put_inode_ref(&parent);

+			ext4_fs_put_inode_ref(&act);

+			goto Finish;

+		}

+		if (ext4_inode_get_links_cnt(act.inode) == 2) {

+			ext4_inode_set_del_time(act.inode, -1L);

+			ext4_inode_set_links_cnt(act.inode, 0);

+			act.dirty = true;

+			/*Truncate*/

+			r = ext4_trunc_dir(mp, &parent, &act);

+			if (r != 0) {

+				ext4_fs_put_inode_ref(&parent);

+				ext4_fs_put_inode_ref(&act);

+				goto Finish;

+			}

+			r = ext4_fs_free_inode(&act);

+			if (r != 0) {

+				ext4_fs_put_inode_ref(&parent);

+				ext4_fs_put_inode_ref(&act);

+				goto Finish;

+			}

+		}

+		r = ext4_fs_put_inode_ref(&parent);

+		if (r != 0)

+			goto Finish;

+		r = ext4_fs_put_inode_ref(&act);

+	Finish:

+		if (r != 0)

+			ext4_trans_abort(mp);

+		else

+			ext4_trans_stop(mp);

+	}

+	ext4_block_cache_write_back(mp->fs.bdev, 0);

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_dir_mv(const char *path, const char *new_path)

+{

+	return ext4_frename(path, new_path);

+}

+int ext4_dir_mk(const char *path)

+{

+	int r;

+	ext4_file f;

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	if (!mp)

+		return -1;

+	if (mp->fs.read_only) {

+		werrstr(Erdonlyfs);

+		return -1;

+	}

+	EXT4_MP_LOCK(mp);

+	/*Check if exist.*/

+	r = ext4_generic_open(&f, path, "r", false, 0, 0);

+	if (r == 0) {

+		werrstr(Eexists);

+		r = -1;

+		goto Finish;

+	}

+	/*Create new directory.*/

+	r = ext4_generic_open(&f, path, "w", false, 0, 0);

+Finish:

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_dir_open(ext4_dir *dir, const char *path)

+{

+	struct ext4_mountpoint *mp = ext4_get_mount(path);

+	int r;

+	if (!mp)

+		return -1;

+	EXT4_MP_LOCK(mp);

+	r = ext4_generic_open(&dir->f, path, "r", false, 0, 0);

+	dir->next_off = 0;

+	EXT4_MP_UNLOCK(mp);

+	return r;

+}

+int ext4_dir_close(ext4_dir *dir)

+{

+    return ext4_fclose(&dir->f);

+}

+const ext4_direntry *ext4_dir_entry_next(ext4_dir *dir)

+{

+#define EXT4_DIR_ENTRY_OFFSET_TERM (u64int)(-1)

+	int r;

+	u16int name_length;

+	ext4_direntry *de = 0;

+	struct ext4_inode_ref dir_inode;

+	struct ext4_dir_iter it;

+	EXT4_MP_LOCK(dir->f.mp);

+	if (dir->next_off == EXT4_DIR_ENTRY_OFFSET_TERM) {

+		EXT4_MP_UNLOCK(dir->f.mp);

+		return 0;

+	}

+	r = ext4_fs_get_inode_ref(&dir->f.mp->fs, dir->f.inode, &dir_inode);

+	if (r != 0) {

+		goto Finish;

+	}

+	r = ext4_dir_iterator_init(&it, &dir_inode, dir->next_off);

+	if (r != 0) {

+		ext4_fs_put_inode_ref(&dir_inode);

+		goto Finish;

+	}

+	memset(dir->de.name, 0, sizeof(dir->de.name));

+	name_length = ext4_dir_en_get_name_len(&dir->f.mp->fs.sb,

+					       it.curr);

+	memcpy(dir->de.name, it.curr->name, name_length);

+	/* Directly copying the content isn't safe for Big-endian targets*/

+	dir->de.inode = ext4_dir_en_get_inode(it.curr);

+	dir->de.entry_length = ext4_dir_en_get_entry_len(it.curr);

+	dir->de.name_length = name_length;

+	dir->de.inode_type = ext4_dir_en_get_inode_type(&dir->f.mp->fs.sb,

+						      it.curr);

+	de = &dir->de;

+	ext4_dir_iterator_next(&it);

+	dir->next_off = it.curr ? it.curr_off : EXT4_DIR_ENTRY_OFFSET_TERM;

+	ext4_dir_iterator_fini(&it);

+	ext4_fs_put_inode_ref(&dir_inode);

+Finish:

+	EXT4_MP_UNLOCK(dir->f.mp);

+	return de;

+}

+void ext4_dir_entry_rewind(ext4_dir *dir)

+{

+	dir->next_off = 0;

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_balloc.c

@@ -1,0 +1,617 @@

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+#include "ext4_debug.h"

+#include "ext4_trans.h"

+#include "ext4_balloc.h"

+#include "ext4_super.h"

+#include "ext4_crc32.h"

+#include "ext4_block_group.h"

+#include "ext4_fs.h"

+#include "ext4_bitmap.h"

+#include "ext4_inode.h"

+/**@brief Compute number of block group from block address.

+ * @param sb superblock pointer.

+ * @param baddr Absolute address of block.

+ * @return Block group index

+ */

+u32int ext4_balloc_get_bgid_of_block(struct ext4_sblock *s,

+				       u64int baddr)

+{

+	if (ext4_get32(s, first_data_block) && baddr)

+		baddr--;

+	return (u32int)(baddr / ext4_get32(s, blocks_per_group));

+}

+/**@brief Compute the starting block address of a block group

+ * @param sb   superblock pointer.

+ * @param bgid block group index

+ * @return Block address

+ */

+u64int ext4_balloc_get_block_of_bgid(struct ext4_sblock *s,

+				       u32int bgid)

+{

+	u64int baddr = 0;

+	if (ext4_get32(s, first_data_block))

+		baddr++;

+	baddr += bgid * ext4_get32(s, blocks_per_group);

+	return baddr;

+}

+static u32int ext4_balloc_bitmap_csum(struct ext4_sblock *sb,

+					void *bitmap)

+{

+	u32int checksum = 0;

+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {

+		u32int blocks_per_group = ext4_get32(sb, blocks_per_group);

+		/* First calculate crc32 checksum against fs uuid */

+		checksum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid,

+				sizeof(sb->uuid));

+		/* Then calculate crc32 checksum against block_group_desc */

+		checksum = ext4_crc32c(checksum, bitmap, blocks_per_group / 8);

+	}

+	return checksum;

+}

+void ext4_balloc_set_bitmap_csum(struct ext4_sblock *sb,

+				 struct ext4_bgroup *bg,

+				 void *bitmap)

+{

+	int desc_size = ext4_sb_get_desc_size(sb);

+	u32int checksum = ext4_balloc_bitmap_csum(sb, bitmap);

+	u16int lo_checksum = to_le16(checksum & 0xFFFF),

+		 hi_checksum = to_le16(checksum >> 16);

+	if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))

+		return;

+	/* See if we need to assign a 32bit checksum */

+	bg->block_bitmap_csum_lo = lo_checksum;

+	if (desc_size == EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		bg->block_bitmap_csum_hi = hi_checksum;

+}

+static bool

+ext4_balloc_verify_bitmap_csum(struct ext4_sblock *sb,

+			       struct ext4_bgroup *bg,

+			       void *bitmap)

+{

+	int desc_size = ext4_sb_get_desc_size(sb);

+	u32int checksum = ext4_balloc_bitmap_csum(sb, bitmap);

+	u16int lo_checksum = to_le16(checksum & 0xFFFF),

+		 hi_checksum = to_le16(checksum >> 16);

+	if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))

+		return true;

+	if (bg->block_bitmap_csum_lo != lo_checksum)

+		return false;

+	if (desc_size == EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		if (bg->block_bitmap_csum_hi != hi_checksum)

+			return false;

+	return true;

+}

+int ext4_balloc_free_block(struct ext4_inode_ref *inode_ref, ext4_fsblk_t baddr)

+{

+	struct ext4_fs *fs = inode_ref->fs;

+	struct ext4_sblock *sb = &fs->sb;

+	u32int bg_id = ext4_balloc_get_bgid_of_block(sb, baddr);

+	u32int index_in_group = ext4_fs_addr_to_idx_bg(sb, baddr);

+	/* Load block group reference */

+	struct ext4_block_group_ref bg_ref;

+	int rc = ext4_fs_get_block_group_ref(fs, bg_id, &bg_ref);

+	if (rc != 0)

+		return rc;

+	struct ext4_bgroup *bg = bg_ref.block_group;

+	/* Load block with bitmap */

+	ext4_fsblk_t bitmap_block_addr =

+	    ext4_bg_get_block_bitmap(bg, sb);

+	struct ext4_block bitmap_block;

+	rc = ext4_trans_block_get(fs->bdev, &bitmap_block, bitmap_block_addr);

+	if (rc != 0) {

+		ext4_fs_put_block_group_ref(&bg_ref);

+		return rc;

+	}

+	if (!ext4_balloc_verify_bitmap_csum(sb, bg, bitmap_block.data)) {

+		ext4_dbg(DEBUG_BALLOC,

+			DBG_WARN "Bitmap checksum failed."

+			"Group: %ud\n",

+			bg_ref.index);

+	}

+	/* Modify bitmap */

+	ext4_bmap_bit_clr(bitmap_block.data, index_in_group);

+	ext4_balloc_set_bitmap_csum(sb, bg, bitmap_block.data);

+	ext4_trans_set_block_dirty(bitmap_block.buf);

+	/* Release block with bitmap */

+	rc = ext4_block_set(fs->bdev, &bitmap_block);

+	if (rc != 0) {

+		/* Error in saving bitmap */

+		ext4_fs_put_block_group_ref(&bg_ref);

+		return rc;

+	}

+	u32int block_size = ext4_sb_get_block_size(sb);

+	/* Update superblock free blocks count */

+	u64int sb_free_blocks = ext4_sb_get_free_blocks_cnt(sb);

+	sb_free_blocks++;

+	ext4_sb_set_free_blocks_cnt(sb, sb_free_blocks);

+	/* Update inode blocks count */

+	u64int ino_blocks = ext4_inode_get_blocks_count(sb, inode_ref->inode);

+	ino_blocks -= block_size / EXT4_INODE_BLOCK_SIZE;

+	ext4_inode_set_blocks_count(sb, inode_ref->inode, ino_blocks);

+	inode_ref->dirty = true;

+	/* Update block group free blocks count */

+	u32int free_blocks = ext4_bg_get_free_blocks_count(bg, sb);

+	free_blocks++;

+	ext4_bg_set_free_blocks_count(bg, sb, free_blocks);

+	bg_ref.dirty = true;

+	rc = ext4_trans_try_revoke_block(fs->bdev, baddr);

+	if (rc != 0) {

+		bg_ref.dirty = false;

+		ext4_fs_put_block_group_ref(&bg_ref);

+		return rc;

+	}

+	ext4_bcache_invalidate_lba(fs->bdev->bc, baddr, 1);

+	/* Release block group reference */

+	rc = ext4_fs_put_block_group_ref(&bg_ref);

+	return rc;

+}

+int ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref,

+			    ext4_fsblk_t first, u32int count)

+{

+	int rc = 0;

+	u32int blk_cnt = count;

+	ext4_fsblk_t start_block = first;

+	struct ext4_fs *fs = inode_ref->fs;

+	struct ext4_sblock *sb = &fs->sb;

+	/* Compute indexes */

+	u32int bg_first = ext4_balloc_get_bgid_of_block(sb, first);

+	/* Compute indexes */

+	u32int bg_last = ext4_balloc_get_bgid_of_block(sb, first + count - 1);

+	if (!ext4_sb_feature_incom(sb, EXT4_FINCOM_FLEX_BG)) {

+		/*It is not possible without flex_bg that blocks are continuous

+		 * and and last block belongs to other bg.*/

+		if (bg_last != bg_first) {

+			ext4_dbg(DEBUG_BALLOC, DBG_WARN "FLEX_BG: disabled & "

+				"bg_last: %ud bg_first: %ud\n",

+				bg_last, bg_first);

+		}

+	}

+	/* Load block group reference */

+	struct ext4_block_group_ref bg_ref;

+	while (bg_first <= bg_last) {

+		rc = ext4_fs_get_block_group_ref(fs, bg_first, &bg_ref);

+		if (rc != 0)

+			return rc;

+		struct ext4_bgroup *bg = bg_ref.block_group;

+		u32int idx_in_bg_first;

+		idx_in_bg_first = ext4_fs_addr_to_idx_bg(sb, first);

+		/* Load block with bitmap */

+		ext4_fsblk_t bitmap_blk = ext4_bg_get_block_bitmap(bg, sb);

+		struct ext4_block blk;

+		rc = ext4_trans_block_get(fs->bdev, &blk, bitmap_blk);

+		if (rc != 0) {

+			ext4_fs_put_block_group_ref(&bg_ref);

+			return rc;

+		}

+		if (!ext4_balloc_verify_bitmap_csum(sb, bg, blk.data)) {

+			ext4_dbg(DEBUG_BALLOC,

+				DBG_WARN "Bitmap checksum failed."

+				"Group: %ud\n",

+				bg_ref.index);

+		}

+		u32int free_cnt;

+		free_cnt = ext4_sb_get_block_size(sb) * 8 - idx_in_bg_first;

+		/*If last block, free only count blocks*/

+		free_cnt = count > free_cnt ? free_cnt : count;

+		/* Modify bitmap */

+		ext4_bmap_bits_free(blk.data, idx_in_bg_first, free_cnt);

+		ext4_balloc_set_bitmap_csum(sb, bg, blk.data);

+		ext4_trans_set_block_dirty(blk.buf);

+		count -= free_cnt;

+		first += free_cnt;

+		/* Release block with bitmap */

+		rc = ext4_block_set(fs->bdev, &blk);

+		if (rc != 0) {

+			ext4_fs_put_block_group_ref(&bg_ref);

+			return rc;

+		}

+		u32int block_size = ext4_sb_get_block_size(sb);

+		/* Update superblock free blocks count */

+		u64int sb_free_blocks = ext4_sb_get_free_blocks_cnt(sb);

+		sb_free_blocks += free_cnt;

+		ext4_sb_set_free_blocks_cnt(sb, sb_free_blocks);

+		/* Update inode blocks count */

+		u64int ino_blocks;

+		ino_blocks = ext4_inode_get_blocks_count(sb, inode_ref->inode);

+		ino_blocks -= free_cnt * (block_size / EXT4_INODE_BLOCK_SIZE);

+		ext4_inode_set_blocks_count(sb, inode_ref->inode, ino_blocks);

+		inode_ref->dirty = true;

+		/* Update block group free blocks count */

+		u32int free_blocks;

+		free_blocks = ext4_bg_get_free_blocks_count(bg, sb);

+		free_blocks += free_cnt;

+		ext4_bg_set_free_blocks_count(bg, sb, free_blocks);

+		bg_ref.dirty = true;

+		/* Release block group reference */

+		rc = ext4_fs_put_block_group_ref(&bg_ref);

+		if (rc != 0)

+			break;

+		bg_first++;

+	}

+	u32int i;

+	for (i = 0;i < blk_cnt;i++) {

+		rc = ext4_trans_try_revoke_block(fs->bdev, start_block + i);

+		if (rc != 0)

+			return rc;

+	}

+	ext4_bcache_invalidate_lba(fs->bdev->bc, start_block, blk_cnt);

+	/*All blocks should be released*/

+	assert(count == 0);

+	return rc;

+}

+int ext4_balloc_alloc_block(struct ext4_inode_ref *inode_ref,

+			    ext4_fsblk_t goal,

+			    ext4_fsblk_t *fblock)

+{

+	ext4_fsblk_t alloc;

+	ext4_fsblk_t bmp_blk_adr;

+	u32int rel_blk_idx = 0;

+	u64int free_blocks;

+	int r;

+	struct ext4_sblock *sb = &inode_ref->fs->sb;

+	/* Load block group number for goal and relative index */

+	u32int bg_id = ext4_balloc_get_bgid_of_block(sb, goal);

+	u32int idx_in_bg = ext4_fs_addr_to_idx_bg(sb, goal);

+	struct ext4_block b;

+	struct ext4_block_group_ref bg_ref;

+	/* Load block group reference */

+	r = ext4_fs_get_block_group_ref(inode_ref->fs, bg_id, &bg_ref);

+	if (r != 0)

+		return r;

+	struct ext4_bgroup *bg = bg_ref.block_group;

+	free_blocks = ext4_bg_get_free_blocks_count(bg_ref.block_group, sb);

+	if (free_blocks == 0) {

+		/* This group has no free blocks */

+		goto goal_failed;

+	}

+	/* Compute indexes */

+	ext4_fsblk_t first_in_bg;

+	first_in_bg = ext4_balloc_get_block_of_bgid(sb, bg_ref.index);

+	u32int first_in_bg_index;

+	first_in_bg_index = ext4_fs_addr_to_idx_bg(sb, first_in_bg);

+	if (idx_in_bg < first_in_bg_index)

+		idx_in_bg = first_in_bg_index;

+	/* Load block with bitmap */

+	bmp_blk_adr = ext4_bg_get_block_bitmap(bg_ref.block_group, sb);

+	r = ext4_trans_block_get(inode_ref->fs->bdev, &b, bmp_blk_adr);

+	if (r != 0) {

+		ext4_fs_put_block_group_ref(&bg_ref);

+		return r;

+	}

+	if (!ext4_balloc_verify_bitmap_csum(sb, bg, b.data)) {

+		ext4_dbg(DEBUG_BALLOC,

+			DBG_WARN "Bitmap checksum failed."

+			"Group: %ud\n",

+			bg_ref.index);

+	}

+	/* Check if goal is free */

+	if (ext4_bmap_is_bit_clr(b.data, idx_in_bg)) {

+		ext4_bmap_bit_set(b.data, idx_in_bg);

+		ext4_balloc_set_bitmap_csum(sb, bg_ref.block_group,

+					    b.data);

+		ext4_trans_set_block_dirty(b.buf);

+		r = ext4_block_set(inode_ref->fs->bdev, &b);

+		if (r != 0) {

+			ext4_fs_put_block_group_ref(&bg_ref);

+			return r;

+		}

+		alloc = ext4_fs_bg_idx_to_addr(sb, idx_in_bg, bg_id);

+		goto success;

+	}

+	u32int blk_in_bg = ext4_blocks_in_group_cnt(sb, bg_id);

+	u32int end_idx = (idx_in_bg + 63) & ~63;

+	if (end_idx > blk_in_bg)

+		end_idx = blk_in_bg;

+	/* Try to find free block near to goal */

+	u32int tmp_idx;

+	for (tmp_idx = idx_in_bg + 1; tmp_idx < end_idx; ++tmp_idx) {

+		if (ext4_bmap_is_bit_clr(b.data, tmp_idx)) {

+			ext4_bmap_bit_set(b.data, tmp_idx);

+			ext4_balloc_set_bitmap_csum(sb, bg, b.data);

+			ext4_trans_set_block_dirty(b.buf);

+			r = ext4_block_set(inode_ref->fs->bdev, &b);

+			if (r != 0) {

+				ext4_fs_put_block_group_ref(&bg_ref);

+				return r;

+			}

+			alloc = ext4_fs_bg_idx_to_addr(sb, tmp_idx, bg_id);

+			goto success;

+		}

+	}

+	/* Find free bit in bitmap */

+	bool no_space;

+	r = ext4_bmap_bit_find_clr(b.data, idx_in_bg, blk_in_bg, &rel_blk_idx, &no_space);

+	if (r == 0) {

+		ext4_bmap_bit_set(b.data, rel_blk_idx);

+		ext4_balloc_set_bitmap_csum(sb, bg_ref.block_group, b.data);

+		ext4_trans_set_block_dirty(b.buf);

+		r = ext4_block_set(inode_ref->fs->bdev, &b);

+		if (r != 0) {

+			ext4_fs_put_block_group_ref(&bg_ref);

+			return r;

+		}

+		alloc = ext4_fs_bg_idx_to_addr(sb, rel_blk_idx, bg_id);

+		goto success;

+	}

+	/* No free block found yet */

+	r = ext4_block_set(inode_ref->fs->bdev, &b);

+	if (r != 0) {

+		ext4_fs_put_block_group_ref(&bg_ref);

+		return r;

+	}

+goal_failed:

+	r = ext4_fs_put_block_group_ref(&bg_ref);

+	if (r != 0)

+		return r;

+	/* Try other block groups */

+	u32int block_group_count = ext4_block_group_cnt(sb);

+	u32int bgid = (bg_id + 1) % block_group_count;

+	u32int count = block_group_count;

+	while (count > 0) {

+		r = ext4_fs_get_block_group_ref(inode_ref->fs, bgid, &bg_ref);

+		if (r != 0)

+			return r;

+		struct ext4_bgroup *bg = bg_ref.block_group;

+		free_blocks = ext4_bg_get_free_blocks_count(bg, sb);

+		if (free_blocks == 0) {

+			/* This group has no free blocks */

+			goto next_group;

+		}

+		/* Load block with bitmap */

+		bmp_blk_adr = ext4_bg_get_block_bitmap(bg, sb);

+		r = ext4_trans_block_get(inode_ref->fs->bdev, &b, bmp_blk_adr);

+		if (r != 0) {

+			ext4_fs_put_block_group_ref(&bg_ref);

+			return r;

+		}

+		if (!ext4_balloc_verify_bitmap_csum(sb, bg, b.data)) {

+			ext4_dbg(DEBUG_BALLOC,

+				DBG_WARN "Bitmap checksum failed."

+				"Group: %ud\n",

+				bg_ref.index);

+		}

+		/* Compute indexes */

+		first_in_bg = ext4_balloc_get_block_of_bgid(sb, bgid);

+		idx_in_bg = ext4_fs_addr_to_idx_bg(sb, first_in_bg);

+		blk_in_bg = ext4_blocks_in_group_cnt(sb, bgid);

+		first_in_bg_index = ext4_fs_addr_to_idx_bg(sb, first_in_bg);

+		if (idx_in_bg < first_in_bg_index)

+			idx_in_bg = first_in_bg_index;

+		bool no_space;

+		r = ext4_bmap_bit_find_clr(b.data, idx_in_bg, blk_in_bg, &rel_blk_idx, &no_space);

+		if (r == 0) {

+			ext4_bmap_bit_set(b.data, rel_blk_idx);

+			ext4_balloc_set_bitmap_csum(sb, bg, b.data);

+			ext4_trans_set_block_dirty(b.buf);

+			r = ext4_block_set(inode_ref->fs->bdev, &b);

+			if (r != 0) {

+				ext4_fs_put_block_group_ref(&bg_ref);

+				return r;

+			}

+			alloc = ext4_fs_bg_idx_to_addr(sb, rel_blk_idx, bgid);

+			goto success;

+		}

+		r = ext4_block_set(inode_ref->fs->bdev, &b);

+		if (r != 0) {

+			ext4_fs_put_block_group_ref(&bg_ref);

+			return r;

+		}

+	next_group:

+		r = ext4_fs_put_block_group_ref(&bg_ref);

+		if (r != 0) {

+			return r;

+		}

+		/* Goto next group */

+		bgid = (bgid + 1) % block_group_count;

+		count--;

+	}

+	werrstr("no free blocks");

+	return -1;

+success:

+    /* Empty command - because of syntax */

+    ;

+	u32int block_size = ext4_sb_get_block_size(sb);

+	/* Update superblock free blocks count */

+	u64int sb_free_blocks = ext4_sb_get_free_blocks_cnt(sb);

+	sb_free_blocks--;

+	ext4_sb_set_free_blocks_cnt(sb, sb_free_blocks);

+	/* Update inode blocks (different block size!) count */

+	u64int ino_blocks = ext4_inode_get_blocks_count(sb, inode_ref->inode);

+	ino_blocks += block_size / EXT4_INODE_BLOCK_SIZE;

+	ext4_inode_set_blocks_count(sb, inode_ref->inode, ino_blocks);

+	inode_ref->dirty = true;

+	/* Update block group free blocks count */

+	u32int fb_cnt = ext4_bg_get_free_blocks_count(bg_ref.block_group, sb);

+	fb_cnt--;

+	ext4_bg_set_free_blocks_count(bg_ref.block_group, sb, fb_cnt);

+	bg_ref.dirty = true;

+	r = ext4_fs_put_block_group_ref(&bg_ref);

+	*fblock = alloc;

+	return r;

+}

+int ext4_balloc_try_alloc_block(struct ext4_inode_ref *inode_ref,

+				ext4_fsblk_t baddr, bool *free)

+{

+	int rc;

+	struct ext4_fs *fs = inode_ref->fs;

+	struct ext4_sblock *sb = &fs->sb;

+	/* Compute indexes */

+	u32int block_group = ext4_balloc_get_bgid_of_block(sb, baddr);

+	u32int index_in_group = ext4_fs_addr_to_idx_bg(sb, baddr);

+	/* Load block group reference */

+	struct ext4_block_group_ref bg_ref;

+	rc = ext4_fs_get_block_group_ref(fs, block_group, &bg_ref);

+	if (rc != 0)

+		return rc;

+	/* Load block with bitmap */

+	ext4_fsblk_t bmp_blk_addr;

+	bmp_blk_addr = ext4_bg_get_block_bitmap(bg_ref.block_group, sb);

+	struct ext4_block b;

+	rc = ext4_trans_block_get(fs->bdev, &b, bmp_blk_addr);

+	if (rc != 0) {

+		ext4_fs_put_block_group_ref(&bg_ref);

+		return rc;

+	}

+	if (!ext4_balloc_verify_bitmap_csum(sb, bg_ref.block_group, b.data)) {

+		ext4_dbg(DEBUG_BALLOC,

+			DBG_WARN "Bitmap checksum failed."

+			"Group: %ud\n",

+			bg_ref.index);

+	}

+	/* Check if block is free */

+	*free = ext4_bmap_is_bit_clr(b.data, index_in_group);

+	/* Allocate block if possible */

+	if (*free) {

+		ext4_bmap_bit_set(b.data, index_in_group);

+		ext4_balloc_set_bitmap_csum(sb, bg_ref.block_group, b.data);

+		ext4_trans_set_block_dirty(b.buf);

+	}

+	/* Release block with bitmap */

+	rc = ext4_block_set(fs->bdev, &b);

+	if (rc != 0) {

+		/* Error in saving bitmap */

+		ext4_fs_put_block_group_ref(&bg_ref);

+		return rc;

+	}

+	/* If block is not free, return */

+	if (!(*free))

+		goto terminate;

+	u32int block_size = ext4_sb_get_block_size(sb);

+	/* Update superblock free blocks count */

+	u64int sb_free_blocks = ext4_sb_get_free_blocks_cnt(sb);

+	sb_free_blocks--;

+	ext4_sb_set_free_blocks_cnt(sb, sb_free_blocks);

+	/* Update inode blocks count */

+	u64int ino_blocks = ext4_inode_get_blocks_count(sb, inode_ref->inode);

+	ino_blocks += block_size / EXT4_INODE_BLOCK_SIZE;

+	ext4_inode_set_blocks_count(sb, inode_ref->inode, ino_blocks);

+	inode_ref->dirty = true;

+	/* Update block group free blocks count */

+	u32int fb_cnt = ext4_bg_get_free_blocks_count(bg_ref.block_group, sb);

+	fb_cnt--;

+	ext4_bg_set_free_blocks_count(bg_ref.block_group, sb, fb_cnt);

+	bg_ref.dirty = true;

+terminate:

+	return ext4_fs_put_block_group_ref(&bg_ref);

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_bcache.c

@@ -1,0 +1,286 @@

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_bcache.h"

+#include "ext4_blockdev.h"

+#include "ext4_debug.h"

+static int ext4_bcache_lba_compare(struct ext4_buf *a, struct ext4_buf *b)

+{

+	 if (a->lba > b->lba)

+		 return 1;

+	 else if (a->lba < b->lba)

+		 return -1;

+	 return 0;

+}

+static int ext4_bcache_lru_compare(struct ext4_buf *a, struct ext4_buf *b)

+{

+	if (a->lru_id > b->lru_id)

+		return 1;

+	else if (a->lru_id < b->lru_id)

+		return -1;

+	return 0;

+}

+RB_GENERATE_INTERNAL(ext4_buf_lba, ext4_buf, lba_node,

+		     ext4_bcache_lba_compare, static inline)

+RB_GENERATE_INTERNAL(ext4_buf_lru, ext4_buf, lru_node,

+		     ext4_bcache_lru_compare, static inline)

+int ext4_bcache_init_dynamic(struct ext4_bcache *bc, u32int cnt,

+			     u32int itemsize)

+{

+	assert(bc && cnt && itemsize);

+	memset(bc, 0, sizeof(struct ext4_bcache));

+	bc->cnt = cnt;

+	bc->itemsize = itemsize;

+	bc->ref_blocks = 0;

+	bc->max_ref_blocks = 0;

+	return 0;

+}

+void ext4_bcache_cleanup(struct ext4_bcache *bc)

+{

+	struct ext4_buf *buf, *tmp;

+	RB_FOREACH_SAFE(buf, ext4_buf_lba, &bc->lba_root, tmp) {

+		ext4_block_flush_buf(bc->bdev, buf);

+		ext4_bcache_drop_buf(bc, buf);

+	}

+}

+int ext4_bcache_fini_dynamic(struct ext4_bcache *bc)

+{

+	memset(bc, 0, sizeof(struct ext4_bcache));

+	return 0;

+}

+/**@brief:

+ *

+ *  This is ext4_bcache, the module handling basic buffer-cache stuff.

+ *

+ *  Buffers in a bcache are sorted by their LBA and stored in a

+ *  RB-Tree(lba_root).

+ *

+ *  Bcache also maintains another RB-Tree(lru_root) right now, where

+ *  buffers are sorted by their LRU id.

+ *

+ *  A singly-linked list is used to track those dirty buffers which are

+ *  ready to be flushed. (Those buffers which are dirty but also referenced

+ *  are not considered ready to be flushed.)

+ *

+ *  When a buffer is not referenced, it will be stored in both lba_root

+ *  and lru_root, while it will only be stored in lba_root when it is

+ *  referenced.

+ */

+static struct ext4_buf *

+ext4_buf_alloc(struct ext4_bcache *bc, u64int lba)

+{

+	void *data;

+	struct ext4_buf *buf;

+	data = ext4_malloc(bc->itemsize);

+	if (!data)

+		return nil;

+	buf = ext4_calloc(1, sizeof(struct ext4_buf));

+	if (!buf) {

+		ext4_free(data);

+		return nil;

+	}

+	buf->lba = lba;

+	buf->data = data;

+	buf->bc = bc;

+	return buf;

+}

+static void ext4_buf_free(struct ext4_buf *buf)

+{

+	ext4_free(buf->data);

+	ext4_free(buf);

+}

+static struct ext4_buf *

+ext4_buf_lookup(struct ext4_bcache *bc, u64int lba)

+{

+	struct ext4_buf tmp = {

+		.lba = lba

+	};

+	return RB_FIND(ext4_buf_lba, &bc->lba_root, &tmp);

+}

+struct ext4_buf *ext4_buf_lowest_lru(struct ext4_bcache *bc)

+{

+	return RB_MIN(ext4_buf_lru, &bc->lru_root);

+}

+void ext4_bcache_drop_buf(struct ext4_bcache *bc, struct ext4_buf *buf)

+{

+	/* Warn on dropping any referenced buffers.*/

+	if (buf->refctr) {

+		ext4_dbg(DEBUG_BCACHE, DBG_WARN "Buffer is still referenced. "

+				"lba: %llud, refctr: %ud\n",

+				buf->lba, buf->refctr);

+	} else

+		RB_REMOVE(ext4_buf_lru, &bc->lru_root, buf);

+	RB_REMOVE(ext4_buf_lba, &bc->lba_root, buf);

+	/*Forcibly drop dirty buffer.*/

+	if (ext4_bcache_test_flag(buf, BC_DIRTY))

+		ext4_bcache_remove_dirty_node(bc, buf);

+	ext4_buf_free(buf);

+	bc->ref_blocks--;

+}

+void ext4_bcache_invalidate_buf(struct ext4_bcache *bc,

+				struct ext4_buf *buf)

+{

+	buf->end_write = nil;

+	buf->end_write_arg = nil;

+	/* Clear both dirty and up-to-date flags. */

+	if (ext4_bcache_test_flag(buf, BC_DIRTY))

+		ext4_bcache_remove_dirty_node(bc, buf);

+	ext4_bcache_clear_dirty(buf);

+}

+void ext4_bcache_invalidate_lba(struct ext4_bcache *bc,

+				u64int from,

+				u32int cnt)

+{

+	u64int end = from + cnt - 1;

+	struct ext4_buf *tmp = ext4_buf_lookup(bc, from), *buf;

+	RB_FOREACH_FROM(buf, ext4_buf_lba, tmp) {

+		if (buf->lba > end)

+			break;

+		ext4_bcache_invalidate_buf(bc, buf);

+	}

+}

+struct ext4_buf *

+ext4_bcache_find_get(struct ext4_bcache *bc, struct ext4_block *b,

+		     u64int lba)

+{

+	struct ext4_buf *buf = ext4_buf_lookup(bc, lba);

+	if (buf) {

+		/* If buffer is not referenced. */

+		if (!buf->refctr) {

+			/* Assign new value to LRU id and increment LRU counter

+			 * by 1*/

+			buf->lru_id = ++bc->lru_ctr;

+			RB_REMOVE(ext4_buf_lru, &bc->lru_root, buf);

+			if (ext4_bcache_test_flag(buf, BC_DIRTY))

+				ext4_bcache_remove_dirty_node(bc, buf);

+		}

+		ext4_bcache_inc_ref(buf);

+		b->lb_id = lba;

+		b->buf = buf;

+		b->data = buf->data;

+	}

+	return buf;

+}

+int ext4_bcache_alloc(struct ext4_bcache *bc, struct ext4_block *b,

+		      bool *is_new)

+{

+	/* Try to search the buffer with exaxt LBA. */

+	struct ext4_buf *buf = ext4_bcache_find_get(bc, b, b->lb_id);

+	if (buf) {

+		*is_new = false;

+		return 0;

+	}

+	/* We need to allocate one buffer.*/

+	buf = ext4_buf_alloc(bc, b->lb_id);

+	if (!buf) {

+		werrstr("memory");

+		return -1;

+	}

+	RB_INSERT(ext4_buf_lba, &bc->lba_root, buf);

+	/* One more buffer in bcache now. :-) */

+	bc->ref_blocks++;

+	/*Calc ref blocks max depth*/

+	if (bc->max_ref_blocks < bc->ref_blocks)

+		bc->max_ref_blocks = bc->ref_blocks;

+	ext4_bcache_inc_ref(buf);

+	/* Assign new value to LRU id and increment LRU counter

+	 * by 1*/

+	buf->lru_id = ++bc->lru_ctr;

+	b->buf = buf;

+	b->data = buf->data;

+	*is_new = true;

+	return 0;

+}

+int ext4_bcache_free(struct ext4_bcache *bc, struct ext4_block *b)

+{

+	struct ext4_buf *buf = b->buf;

+	assert(bc && b);

+	/*Check if valid.*/

+	if (!b->lb_id) {

+		werrstr("invalid block id");

+		return -1;

+	}

+	/*Block should have a valid pointer to ext4_buf.*/

+	assert(buf);

+	/*Check if someone don't try free unreferenced block cache.*/

+	assert(buf->refctr);

+	/*Just decrease reference counter*/

+	ext4_bcache_dec_ref(buf);

+	/* We are the last one touching this buffer, do the cleanups. */

+	if (!buf->refctr) {

+		RB_INSERT(ext4_buf_lru, &bc->lru_root, buf);

+		/* This buffer is ready to be flushed. */

+		if (ext4_bcache_test_flag(buf, BC_DIRTY) &&

+		    ext4_bcache_test_flag(buf, BC_UPTODATE)) {

+			if (bc->bdev->cache_write_back &&

+			    !ext4_bcache_test_flag(buf, BC_FLUSH) &&

+			    !ext4_bcache_test_flag(buf, BC_TMP))

+				ext4_bcache_insert_dirty_node(bc, buf);

+			else {

+				ext4_block_flush_buf(bc->bdev, buf);

+				ext4_bcache_clear_flag(buf, BC_FLUSH);

+			}

+		}

+		/* The buffer is invalidated...drop it. */

+		if (!ext4_bcache_test_flag(buf, BC_UPTODATE) ||

+		    ext4_bcache_test_flag(buf, BC_TMP))

+			ext4_bcache_drop_buf(bc, buf);

+	}

+	b->lb_id = 0;

+	b->data = 0;

+	return 0;

+}

+bool ext4_bcache_is_full(struct ext4_bcache *bc)

+{

+	return (bc->cnt <= bc->ref_blocks);

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_bitmap.c

@@ -1,0 +1,84 @@

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+#include "ext4_debug.h"

+#include "ext4_bitmap.h"

+void ext4_bmap_bits_free(u8int *bmap, u32int sbit, u32int bcnt)

+{

+	u32int i = sbit;

+	while (i & 7) {

+		if (!bcnt)

+			return;

+		ext4_bmap_bit_clr(bmap, i);

+		bcnt--;

+		i++;

+	}

+	sbit = i;

+	bmap += sbit >> 3;

+	memset(bmap, 0, bcnt >> 3);

+	bmap += bcnt >> 3;

+	for (i = 0; i < bcnt; ++i) {

+		ext4_bmap_bit_clr(bmap, i);

+	}

+}

+int ext4_bmap_bit_find_clr(u8int *bmap, u32int sbit, u32int ebit,

+			   u32int *bit_id, bool *no_space)

+{

+	u32int i;

+	u32int bcnt = ebit - sbit;

+	i = sbit;

+	*no_space = false;

+	while (i & 7) {

+		if(!bcnt){

+Nospace:

+			*no_space = true;

+			return -1;

+		}

+		if (ext4_bmap_is_bit_clr(bmap, i)) {

+			*bit_id = sbit;

+			return 0;

+		}

+		i++;

+		bcnt--;

+	}

+	sbit = i;

+	bmap += (sbit >> 3);

+	while (bcnt >= 8) {

+		if (*bmap != 0xFF) {

+			for (i = 0; i < 8; ++i) {

+				if (ext4_bmap_is_bit_clr(bmap, i)) {

+					*bit_id = sbit + i;

+					return 0;

+				}

+			}

+		}

+		bmap += 1;

+		bcnt -= 8;

+		sbit += 8;

+	}

+	for (i = 0; i < bcnt; ++i) {

+		if (ext4_bmap_is_bit_clr(bmap, i)) {

+			*bit_id = sbit + i;

+			return 0;

+		}

+	}

+	goto Nospace;

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_block_group.c

@@ -1,0 +1,47 @@

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+#include "ext4_debug.h"

+#include "ext4_block_group.h"

+/**@brief CRC-16 look up table*/

+static u16int const crc16_tab[256] = {

+    0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, 0xC601,

+    0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, 0xCC01, 0x0CC0,

+    0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, 0x0A00, 0xCAC1, 0xCB81,

+    0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, 0xD801, 0x18C0, 0x1980, 0xD941,

+    0x1B00, 0xDBC1, 0xDA81, 0x1A40, 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01,

+    0x1DC0, 0x1C80, 0xDC41, 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0,

+    0x1680, 0xD641, 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081,

+    0x1040, 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240,

+    0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, 0x3C00,

+    0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, 0xFA01, 0x3AC0,

+    0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, 0x2800, 0xE8C1, 0xE981,

+    0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, 0xEE01, 0x2EC0, 0x2F80, 0xEF41,

+    0x2D00, 0xEDC1, 0xEC81, 0x2C40, 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700,

+    0xE7C1, 0xE681, 0x2640, 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0,

+    0x2080, 0xE041, 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281,

+    0x6240, 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441,

+    0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, 0xAA01,

+    0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, 0x7800, 0xB8C1,

+    0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, 0xBE01, 0x7EC0, 0x7F80,

+    0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, 0xB401, 0x74C0, 0x7580, 0xB541,

+    0x7700, 0xB7C1, 0xB681, 0x7640, 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101,

+    0x71C0, 0x7080, 0xB041, 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0,

+    0x5280, 0x9241, 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481,

+    0x5440, 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40,

+    0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, 0x8801,

+    0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, 0x4E00, 0x8EC1,

+    0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, 0x4400, 0x84C1, 0x8581,

+    0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, 0x8201, 0x42C0, 0x4380, 0x8341,

+    0x4100, 0x81C1, 0x8081, 0x4040};

+u16int ext4_bg_crc16(u16int crc, const u8int *buffer, usize len)

+{

+	while (len--)

+		crc = (((crc >> 8) & 0xffU) ^

+		       crc16_tab[(crc ^ *buffer++) & 0xffU]) &

+		      0x0000ffffU;

+	return crc;

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_blockdev.c

@@ -1,0 +1,443 @@

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+#include "ext4_debug.h"

+#include "ext4_blockdev.h"

+#include "ext4_fs.h"

+#include "ext4_journal.h"

+static char Eoorop[] = "out of range operation";

+static void ext4_bdif_lock(struct ext4_blockdev *bdev)

+{

+	if (!bdev->bdif->lock)

+		return;

+	int r = bdev->bdif->lock(bdev);

+	assert(r == 0);

+}

+static void ext4_bdif_unlock(struct ext4_blockdev *bdev)

+{

+	if (!bdev->bdif->unlock)

+		return;

+	int r = bdev->bdif->unlock(bdev);

+	assert(r == 0);

+}

+static int ext4_bdif_bread(struct ext4_blockdev *bdev, void *buf,

+			   u64int blk_id, u32int blk_cnt)

+{

+	ext4_bdif_lock(bdev);

+	int r = bdev->bdif->bread(bdev, buf, blk_id, blk_cnt);

+	bdev->bdif->bread_ctr++;

+	ext4_bdif_unlock(bdev);

+	return r;

+}

+static int ext4_bdif_bwrite(struct ext4_blockdev *bdev, const void *buf,

+			    u64int blk_id, u32int blk_cnt)

+{

+	ext4_bdif_lock(bdev);

+	int r = bdev->bdif->bwrite(bdev, buf, blk_id, blk_cnt);

+	bdev->bdif->bwrite_ctr++;

+	ext4_bdif_unlock(bdev);

+	return r;

+}

+int ext4_block_init(struct ext4_blockdev *bdev)

+{

+	int rc;

+	assert(bdev);

+	assert(bdev->bdif);

+	assert(bdev->bdif->open &&

+		   bdev->bdif->close &&

+		   bdev->bdif->bread &&

+		   bdev->bdif->bwrite);

+	if (bdev->bdif->ph_refctr) {

+		bdev->bdif->ph_refctr++;

+		return 0;

+	}

+	/*Low level block init*/

+	rc = bdev->bdif->open(bdev);

+	if (rc != 0)

+		return rc;

+	bdev->bdif->ph_refctr = 1;

+	return 0;

+}

+int ext4_block_bind_bcache(struct ext4_blockdev *bdev, struct ext4_bcache *bc)

+{

+	assert(bdev && bc);

+	bdev->bc = bc;

+	bc->bdev = bdev;

+	return 0;

+}

+void ext4_block_set_lb_size(struct ext4_blockdev *bdev, u32int lb_bsize)

+{

+	/*Logical block size has to be multiply of physical */

+	assert(!(lb_bsize % bdev->bdif->ph_bsize));

+	bdev->lg_bsize = lb_bsize;

+	bdev->lg_bcnt = bdev->part_size / lb_bsize;

+}

+int ext4_block_fini(struct ext4_blockdev *bdev)

+{

+	assert(bdev);

+	if (!bdev->bdif->ph_refctr)

+		return 0;

+	bdev->bdif->ph_refctr--;

+	if (bdev->bdif->ph_refctr)

+		return 0;

+	/*Low level block fini*/

+	return bdev->bdif->close(bdev);

+}

+int ext4_block_flush_buf(struct ext4_blockdev *bdev, struct ext4_buf *buf)

+{

+	int r;

+	struct ext4_bcache *bc = bdev->bc;

+	if (ext4_bcache_test_flag(buf, BC_DIRTY) &&

+	    ext4_bcache_test_flag(buf, BC_UPTODATE)) {

+		r = ext4_blocks_set_direct(bdev, buf->data, buf->lba, 1);

+		if (r) {

+			if (buf->end_write) {

+				bc->dont_shake = true;

+				buf->end_write(bc, buf, r, buf->end_write_arg);

+				bc->dont_shake = false;

+			}

+			return r;

+		}

+		ext4_bcache_remove_dirty_node(bc, buf);

+		ext4_bcache_clear_flag(buf, BC_DIRTY);

+		if (buf->end_write) {

+			bc->dont_shake = true;

+			buf->end_write(bc, buf, r, buf->end_write_arg);

+			bc->dont_shake = false;

+		}

+	}

+	return 0;

+}

+int ext4_block_flush_lba(struct ext4_blockdev *bdev, u64int lba)

+{

+	int r = 0;

+	struct ext4_buf *buf;

+	struct ext4_block b;

+	buf = ext4_bcache_find_get(bdev->bc, &b, lba);

+	if (buf) {

+		r = ext4_block_flush_buf(bdev, buf);

+		ext4_bcache_free(bdev->bc, &b);

+	}

+	return r;

+}

+int ext4_block_cache_shake(struct ext4_blockdev *bdev)

+{

+	int r = 0;

+	struct ext4_buf *buf;

+	if (bdev->bc->dont_shake)

+		return 0;

+	bdev->bc->dont_shake = true;

+	while (!RB_EMPTY(&bdev->bc->lru_root) &&

+		ext4_bcache_is_full(bdev->bc)) {

+		buf = ext4_buf_lowest_lru(bdev->bc);

+		assert(buf);

+		if (ext4_bcache_test_flag(buf, BC_DIRTY)) {

+			r = ext4_block_flush_buf(bdev, buf);

+			if (r != 0)

+				break;

+		}

+		ext4_bcache_drop_buf(bdev->bc, buf);

+	}

+	bdev->bc->dont_shake = false;

+	return r;

+}

+int ext4_block_get_noread(struct ext4_blockdev *bdev, struct ext4_block *b,

+			  u64int lba)

+{

+	bool is_new;

+	int r;

+	assert(bdev && b);

+	if (!bdev->bdif->ph_refctr || !(lba < bdev->lg_bcnt)) {

+		werrstr(Eio);

+		return -1;

+	}

+	b->lb_id = lba;

+	/*If cache is full we have to (flush and) drop it anyway :(*/

+	r = ext4_block_cache_shake(bdev);

+	if (r != 0)

+		return r;

+	r = ext4_bcache_alloc(bdev->bc, b, &is_new);

+	if (r != 0)

+		return r;

+	if (!b->data) {

+		werrstr("memory");

+		return -1;

+	}

+	return 0;

+}

+int ext4_block_get(struct ext4_blockdev *bdev, struct ext4_block *b,

+		   u64int lba)

+{

+	int r = ext4_block_get_noread(bdev, b, lba);

+	if (r != 0)

+		return r;

+	if (ext4_bcache_test_flag(b->buf, BC_UPTODATE)) {

+		/* Data in the cache is up-to-date.

+		 * Reading from physical device is not required */

+		return 0;

+	}

+	r = ext4_blocks_get_direct(bdev, b->data, lba, 1);

+	if (r != 0) {

+		ext4_bcache_free(bdev->bc, b);

+		b->lb_id = 0;

+		return r;

+	}

+	/* Mark buffer up-to-date, since

+	 * fresh data is read from physical device just now. */

+	ext4_bcache_set_flag(b->buf, BC_UPTODATE);

+	return 0;

+}

+int ext4_block_set(struct ext4_blockdev *bdev, struct ext4_block *b)

+{

+	assert(bdev && b);

+	assert(b->buf);

+	if (!bdev->bdif->ph_refctr) {

+		werrstr(Eio);

+		return -1;

+	}

+	return ext4_bcache_free(bdev->bc, b);

+}

+int ext4_blocks_get_direct(struct ext4_blockdev *bdev, void *buf, u64int lba,

+			   u32int cnt)

+{

+	u64int pba;

+	u32int pb_cnt;

+	assert(bdev && buf);

+	pba = (lba * bdev->lg_bsize + bdev->part_offset) / bdev->bdif->ph_bsize;

+	pb_cnt = bdev->lg_bsize / bdev->bdif->ph_bsize;

+	return ext4_bdif_bread(bdev, buf, pba, pb_cnt * cnt);

+}

+int ext4_blocks_set_direct(struct ext4_blockdev *bdev, const void *buf,

+			   u64int lba, u32int cnt)

+{

+	u64int pba;

+	u32int pb_cnt;

+	assert(bdev && buf);

+	pba = (lba * bdev->lg_bsize + bdev->part_offset) / bdev->bdif->ph_bsize;

+	pb_cnt = bdev->lg_bsize / bdev->bdif->ph_bsize;

+	return ext4_bdif_bwrite(bdev, buf, pba, pb_cnt * cnt);

+}

+int ext4_block_writebytes(struct ext4_blockdev *bdev, u64int off,

+			  const void *buf, u32int len)

+{

+	u64int block_idx;

+	u32int blen;

+	u32int unalg;

+	int r = 0;

+	const u8int *p = (void *)buf;

+	assert(bdev && buf);

+	if (!bdev->bdif->ph_refctr) {

+		werrstr(Eio);

+		return -1;

+	}

+	if (off + len > bdev->part_size) {

+		werrstr(Eoorop);

+		return -1;

+	}

+	block_idx = ((off + bdev->part_offset) / bdev->bdif->ph_bsize);

+	/*OK lets deal with the first possible unaligned block*/

+	unalg = (off & (bdev->bdif->ph_bsize - 1));

+	if (unalg) {

+		u32int wlen = (bdev->bdif->ph_bsize - unalg) > len

+				    ? len

+				    : (bdev->bdif->ph_bsize - unalg);

+		r = ext4_bdif_bread(bdev, bdev->bdif->ph_bbuf, block_idx, 1);

+		if (r != 0)

+			return r;

+		memcpy(bdev->bdif->ph_bbuf + unalg, p, wlen);

+		r = ext4_bdif_bwrite(bdev, bdev->bdif->ph_bbuf, block_idx, 1);

+		if (r != 0)

+			return r;

+		p += wlen;

+		len -= wlen;

+		block_idx++;

+	}

+	/*Aligned data*/

+	blen = len / bdev->bdif->ph_bsize;

+	if (blen != 0) {

+		r = ext4_bdif_bwrite(bdev, p, block_idx, blen);

+		if (r != 0)

+			return r;

+		p += bdev->bdif->ph_bsize * blen;

+		len -= bdev->bdif->ph_bsize * blen;

+		block_idx += blen;

+	}

+	/*Rest of the data*/

+	if (len) {

+		r = ext4_bdif_bread(bdev, bdev->bdif->ph_bbuf, block_idx, 1);

+		if (r != 0)

+			return r;

+		memcpy(bdev->bdif->ph_bbuf, p, len);

+		r = ext4_bdif_bwrite(bdev, bdev->bdif->ph_bbuf, block_idx, 1);

+		if (r != 0)

+			return r;

+	}

+	return r;

+}

+int ext4_block_readbytes(struct ext4_blockdev *bdev, u64int off, void *buf,

+			 u32int len)

+{

+	u64int block_idx;

+	u32int blen;

+	u32int unalg;

+	int r = 0;

+	u8int *p = (void *)buf;

+	assert(bdev && buf);

+	if (!bdev->bdif->ph_refctr) {

+		werrstr(Eio);

+		return -1;

+	}

+	if (off + len > bdev->part_size) {

+		werrstr(Eoorop);

+		return -1;

+	}

+	block_idx = ((off + bdev->part_offset) / bdev->bdif->ph_bsize);

+	/*OK lets deal with the first possible unaligned block*/

+	unalg = (off & (bdev->bdif->ph_bsize - 1));

+	if (unalg) {

+		u32int rlen = (bdev->bdif->ph_bsize - unalg) > len

+				    ? len

+				    : (bdev->bdif->ph_bsize - unalg);

+		r = ext4_bdif_bread(bdev, bdev->bdif->ph_bbuf, block_idx, 1);

+		if (r != 0)

+			return r;

+		memcpy(p, bdev->bdif->ph_bbuf + unalg, rlen);

+		p += rlen;

+		len -= rlen;

+		block_idx++;

+	}

+	/*Aligned data*/

+	blen = len / bdev->bdif->ph_bsize;

+	if (blen != 0) {

+		r = ext4_bdif_bread(bdev, p, block_idx, blen);

+		if (r != 0)

+			return r;

+		p += bdev->bdif->ph_bsize * blen;

+		len -= bdev->bdif->ph_bsize * blen;

+		block_idx += blen;

+	}

+	/*Rest of the data*/

+	if (len) {

+		r = ext4_bdif_bread(bdev, bdev->bdif->ph_bbuf, block_idx, 1);

+		if (r != 0)

+			return r;

+		memcpy(p, bdev->bdif->ph_bbuf, len);

+	}

+	return r;

+}

+int ext4_block_cache_flush(struct ext4_blockdev *bdev)

+{

+	while (!SLIST_EMPTY(&bdev->bc->dirty_list)) {

+		int r;

+		struct ext4_buf *buf = SLIST_FIRST(&bdev->bc->dirty_list);

+		assert(buf);

+		r = ext4_block_flush_buf(bdev, buf);

+		if (r != 0)

+			return r;

+	}

+	return 0;

+}

+int ext4_block_cache_write_back(struct ext4_blockdev *bdev, u8int on_off)

+{

+	if (on_off)

+		bdev->cache_write_back++;

+	if (!on_off && bdev->cache_write_back)

+		bdev->cache_write_back--;

+	if (bdev->cache_write_back)

+		return 0;

+	/*Flush data in all delayed cache blocks*/

+	return ext4_block_cache_flush(bdev);

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_crc32.c

@@ -1,0 +1,144 @@

+/* Based on FreeBSD. */

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+#include "ext4_debug.h"

+#include "ext4_crc32.h"

+static const u32int crc32_tab[] = {

+	0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,

+	0xe963a535, 0x9e6495a3,	0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,

+	0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,

+	0xf3b97148, 0x84be41de,	0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,

+	0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,	0x14015c4f, 0x63066cd9,

+	0xfa0f3d63, 0x8d080df5,	0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,

+	0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,	0x35b5a8fa, 0x42b2986c,

+	0xdbbbc9d6, 0xacbcf940,	0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,

+	0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,

+	0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,

+	0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,	0x76dc4190, 0x01db7106,

+	0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,

+	0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,

+	0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,

+	0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,

+	0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,

+	0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,

+	0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,

+	0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,

+	0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,

+	0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,

+	0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,

+	0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,

+	0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,

+	0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,

+	0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,

+	0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,

+	0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,

+	0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,

+	0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,

+	0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,

+	0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,

+	0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,

+	0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,

+	0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,

+	0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,

+	0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,

+	0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,

+	0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,

+	0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,

+	0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,

+	0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,

+	0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d

+};

+/* */

+/* CRC LOOKUP TABLE */

+/* ================ */

+/* The following CRC lookup table was generated automagically */

+/* by the Rocksoft^tm Model CRC Algorithm Table Generation */

+/* Program V1.0 using the following model parameters: */

+/* */

+/* Width : 4 bytes. */

+/* Poly : 0x1EDC6F41L */

+/* Reverse : TRUE. */

+/* */

+/* For more information on the Rocksoft^tm Model CRC Algorithm, */

+/* see the document titled "A Painless Guide to CRC Error */

+/* Detection Algorithms" by Ross Williams */

+/* ([email protected].). This document is likely to be */

+/* in the FTP archive "ftp.adelaide.edu.au/pub/rocksoft". */

+/* */

+static const u32int crc32c_tab[256] = {

+    0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L, 0xC79A971FL,

+    0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL, 0x8AD958CFL, 0x78B2DBCCL,

+    0x6BE22838L, 0x9989AB3BL, 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L,

+    0x5E133C24L, 0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL,

+    0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L, 0x9A879FA0L,

+    0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L, 0x5D1D08BFL, 0xAF768BBCL,

+    0xBC267848L, 0x4E4DFB4BL, 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L,

+    0x33ED7D2AL, 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L,

+    0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L, 0x6DFE410EL,

+    0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL, 0x30E349B1L, 0xC288CAB2L,

+    0xD1D83946L, 0x23B3BA45L, 0xF779DEAEL, 0x05125DADL, 0x1642AE59L,

+    0xE4292D5AL, 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL,

+    0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L, 0x417B1DBCL,

+    0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L, 0x86E18AA3L, 0x748A09A0L,

+    0x67DAFA54L, 0x95B17957L, 0xCBA24573L, 0x39C9C670L, 0x2A993584L,

+    0xD8F2B687L, 0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L,

+    0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L, 0x96BF4DCCL,

+    0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L, 0xDBFC821CL, 0x2997011FL,

+    0x3AC7F2EBL, 0xC8AC71E8L, 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L,

+    0x0F36E6F7L, 0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L,

+    0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L, 0xEB1FCBADL,

+    0x197448AEL, 0x0A24BB5AL, 0xF84F3859L, 0x2C855CB2L, 0xDEEEDFB1L,

+    0xCDBE2C45L, 0x3FD5AF46L, 0x7198540DL, 0x83F3D70EL, 0x90A324FAL,

+    0x62C8A7F9L, 0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L,

+    0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L, 0x3CDB9BDDL,

+    0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L, 0x82F63B78L, 0x709DB87BL,

+    0x63CD4B8FL, 0x91A6C88CL, 0x456CAC67L, 0xB7072F64L, 0xA457DC90L,

+    0x563C5F93L, 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L,

+    0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL, 0x92A8FC17L,

+    0x60C37F14L, 0x73938CE0L, 0x81F80FE3L, 0x55326B08L, 0xA759E80BL,

+    0xB4091BFFL, 0x466298FCL, 0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL,

+    0x0B21572CL, 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L,

+    0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L, 0x65D122B9L,

+    0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL, 0x2892ED69L, 0xDAF96E6AL,

+    0xC9A99D9EL, 0x3BC21E9DL, 0xEF087A76L, 0x1D63F975L, 0x0E330A81L,

+    0xFC588982L, 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL,

+    0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L, 0x38CC2A06L,

+    0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L, 0xFF56BD19L, 0x0D3D3E1AL,

+    0x1E6DCDEEL, 0xEC064EEDL, 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L,

+    0xD0DDD530L, 0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL,

+    0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL, 0x8ECEE914L,

+    0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L, 0xD3D3E1ABL, 0x21B862A8L,

+    0x32E8915CL, 0xC083125FL, 0x144976B4L, 0xE622F5B7L, 0xF5720643L,

+    0x07198540L, 0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L,

+    0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL, 0xE330A81AL,

+    0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL, 0x24AA3F05L, 0xD6C1BC06L,

+    0xC5914FF2L, 0x37FACCF1L, 0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L,

+    0x7AB90321L, 0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL,

+    0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L, 0x34F4F86AL,

+    0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL, 0x79B737BAL, 0x8BDCB4B9L,

+    0x988C474DL, 0x6AE7C44EL, 0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L,

+    0xAD7D5351L};

+static inline u32int crc32(u32int crc, const void *buf, u32int size,

+			     const u32int *tab)

+{

+	const u8int *p = (const u8int *)buf;

+	while (size--)

+		crc = tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);

+	return (crc);

+}

+u32int ext4_crc32(u32int crc, const void *buf, u32int size)

+{

+	return crc32(crc, buf, size, crc32_tab);

+}

+u32int ext4_crc32c(u32int crc, const void *buf, u32int size)

+{

+	return crc32(crc, buf, size, crc32c_tab);

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_debug.c

@@ -1,0 +1,21 @@

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+#include "ext4_debug.h"

+static u32int debug_mask;

+void ext4_dmask_set(u32int m)

+{

+	debug_mask |= m;

+}

+void ext4_dmask_clr(u32int m)

+{

+	debug_mask &= ~m;

+}

+u32int ext4_dmask_get(void)

+{

+	return debug_mask;

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_dir.c

@@ -1,0 +1,649 @@

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+#include "ext4_debug.h"

+#include "ext4_trans.h"

+#include "ext4_dir.h"

+#include "ext4_dir_idx.h"

+#include "ext4_crc32.h"

+#include "ext4_inode.h"

+#include "ext4_fs.h"

+/* Walk through a dirent block to find a checksum "dirent" at the tail */

+static struct ext4_dir_entry_tail *

+ext4_dir_get_tail(struct ext4_inode_ref *inode_ref,

+		struct ext4_dir_en *de)

+{

+	struct ext4_dir_entry_tail *t;

+	struct ext4_sblock *sb = &inode_ref->fs->sb;

+	t = EXT4_DIRENT_TAIL(de, ext4_sb_get_block_size(sb));

+	if (t->reserved_zero1 || t->reserved_zero2)

+		return nil;

+	if (to_le16(t->rec_len) != sizeof(struct ext4_dir_entry_tail))

+		return nil;

+	if (t->reserved_ft != EXT4_DIRENTRY_DIR_CSUM)

+		return nil;

+	return t;

+}

+static u32int ext4_dir_csum(struct ext4_inode_ref *inode_ref,

+			      struct ext4_dir_en *dirent, int size)

+{

+	u32int csum;

+	struct ext4_sblock *sb = &inode_ref->fs->sb;

+	u32int ino_index = to_le32(inode_ref->index);

+	u32int ino_gen = to_le32(ext4_inode_get_generation(inode_ref->inode));

+	/* First calculate crc32 checksum against fs uuid */

+	csum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid, sizeof(sb->uuid));

+	/* Then calculate crc32 checksum against inode number

+	 * and inode generation */

+	csum = ext4_crc32c(csum, &ino_index, sizeof(ino_index));

+	csum = ext4_crc32c(csum, &ino_gen, sizeof(ino_gen));

+	/* Finally calculate crc32 checksum against directory entries */

+	csum = ext4_crc32c(csum, dirent, size);

+	return csum;

+}

+bool ext4_dir_csum_verify(struct ext4_inode_ref *inode_ref,

+			      struct ext4_dir_en *dirent)

+{

+	struct ext4_dir_entry_tail *t;

+	struct ext4_sblock *sb = &inode_ref->fs->sb;

+	/* Compute the checksum only if the filesystem supports it */

+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {

+		t = ext4_dir_get_tail(inode_ref, dirent);

+		if (!t) {

+			/* There is no space to hold the checksum */

+			return false;

+		}

+		intptr diff = (char *)t - (char *)dirent;

+		u32int csum = ext4_dir_csum(inode_ref, dirent, diff);

+		if (t->checksum != to_le32(csum))

+			return false;

+	}

+	return true;

+}

+void ext4_dir_init_entry_tail(struct ext4_dir_entry_tail *t)

+{

+	memset(t, 0, sizeof(struct ext4_dir_entry_tail));

+	t->rec_len = to_le16(sizeof(struct ext4_dir_entry_tail));

+	t->reserved_ft = EXT4_DIRENTRY_DIR_CSUM;

+}

+void ext4_dir_set_csum(struct ext4_inode_ref *inode_ref,

+			   struct ext4_dir_en *dirent)

+{

+	struct ext4_dir_entry_tail *t;

+	struct ext4_sblock *sb = &inode_ref->fs->sb;

+	/* Compute the checksum only if the filesystem supports it */

+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {

+		t = ext4_dir_get_tail(inode_ref, dirent);

+		if (!t) {

+			/* There is no space to hold the checksum */

+			return;

+		}

+		intptr diff = (char *)t - (char *)dirent;

+		u32int csum = ext4_dir_csum(inode_ref, dirent, diff);

+		t->checksum = to_le32(csum);

+	}

+}

+/**@brief Do some checks before returning iterator.

+ * @param it Iterator to be checked

+ * @param block_size Size of data block

+ * @return Error code

+ */

+static int ext4_dir_iterator_set(struct ext4_dir_iter *it,

+				 u32int block_size)

+{

+	u32int off_in_block = it->curr_off % block_size;

+	struct ext4_sblock *sb = &it->inode_ref->fs->sb;

+	it->curr = nil;

+	/* Ensure proper alignment */

+	if ((off_in_block % 4) != 0)

+		goto Ioerr;

+	/* Ensure that the core of the entry does not overflow the block */

+	if (off_in_block > block_size - 8)

+		goto Ioerr;

+	struct ext4_dir_en *en;

+	en = (void *)(it->curr_blk.data + off_in_block);

+	/* Ensure that the whole entry does not overflow the block */

+	u16int length = ext4_dir_en_get_entry_len(en);

+	if (off_in_block + length > block_size)

+		goto Ioerr;

+	/* Ensure the name length is not too large */

+	if (ext4_dir_en_get_name_len(sb, en) > length - 8)

+		goto Ioerr;

+	/* Everything OK - "publish" the entry */

+	it->curr = en;

+	return 0;

+Ioerr:

+	werrstr("i/o error");

+	return -1;

+}

+/**@brief Seek to next valid directory entry.

+ *        Here can be jumped to the next data block.

+ * @param it  Initialized iterator

+ * @param pos Position of the next entry

+ * @return Error code

+ */

+static int ext4_dir_iterator_seek(struct ext4_dir_iter *it, u64int pos)

+{

+	struct ext4_sblock *sb = &it->inode_ref->fs->sb;

+	struct ext4_inode *inode = it->inode_ref->inode;

+	struct ext4_blockdev *bdev = it->inode_ref->fs->bdev;

+	u64int size = ext4_inode_get_size(sb, inode);

+	int r;

+	/* The iterator is not valid until we seek to the desired position */

+	it->curr = nil;

+	/* Are we at the end? */

+	if (pos >= size) {

+		if (it->curr_blk.lb_id) {

+			r = ext4_block_set(bdev, &it->curr_blk);

+			it->curr_blk.lb_id = 0;

+			if (r != 0)

+				return r;

+		}

+		it->curr_off = pos;

+		return 0;

+	}

+	/* Compute next block address */

+	u32int block_size = ext4_sb_get_block_size(sb);

+	u64int current_blk_idx = it->curr_off / block_size;

+	u32int next_blk_idx = (u32int)(pos / block_size);

+	/*

+	 * If we don't have a block or are moving across block boundary,

+	 * we need to get another block

+	 */

+	if ((it->curr_blk.lb_id == 0) ||

+	    (current_blk_idx != next_blk_idx)) {

+		if (it->curr_blk.lb_id) {

+			r = ext4_block_set(bdev, &it->curr_blk);

+			it->curr_blk.lb_id = 0;

+			if (r != 0)

+				return r;

+		}

+		ext4_fsblk_t next_blk;

+		r = ext4_fs_get_inode_dblk_idx(it->inode_ref, next_blk_idx,

+					       &next_blk, false);

+		if (r != 0)

+			return r;

+		r = ext4_trans_block_get(bdev, &it->curr_blk, next_blk);

+		if (r != 0) {

+			it->curr_blk.lb_id = 0;

+			return r;

+		}

+	}

+	it->curr_off = pos;

+	return ext4_dir_iterator_set(it, block_size);

+}

+int ext4_dir_iterator_init(struct ext4_dir_iter *it,

+			   struct ext4_inode_ref *inode_ref, u64int pos)

+{

+	it->inode_ref = inode_ref;

+	it->curr = 0;

+	it->curr_off = 0;

+	it->curr_blk.lb_id = 0;

+	return ext4_dir_iterator_seek(it, pos);

+}

+int ext4_dir_iterator_next(struct ext4_dir_iter *it)

+{

+	int r = 0;

+	u16int skip;

+	while (r == 0) {

+		skip = ext4_dir_en_get_entry_len(it->curr);

+		r = ext4_dir_iterator_seek(it, it->curr_off + skip);

+		if (!it->curr)

+			break;

+		/*Skip nil referenced entry*/

+		if (ext4_dir_en_get_inode(it->curr) != 0)

+			break;

+	}

+	return r;

+}

+int ext4_dir_iterator_fini(struct ext4_dir_iter *it)

+{

+	it->curr = 0;

+	if (it->curr_blk.lb_id)

+		return ext4_block_set(it->inode_ref->fs->bdev, &it->curr_blk);

+	return 0;

+}

+void ext4_dir_write_entry(struct ext4_sblock *sb, struct ext4_dir_en *en,

+			  u16int entry_len, struct ext4_inode_ref *child,

+			  const char *name, usize name_len)

+{

+	/* Check maximum entry length */

+	assert(entry_len <= ext4_sb_get_block_size(sb));

+	/* Set type of entry */

+	switch (ext4_inode_type(sb, child->inode)) {

+	case EXT4_INODE_MODE_DIRECTORY:

+		ext4_dir_en_set_inode_type(sb, en, EXT4_DE_DIR);

+		break;

+	case EXT4_INODE_MODE_FILE:

+		ext4_dir_en_set_inode_type(sb, en, EXT4_DE_REG_FILE);

+		break;

+	case EXT4_INODE_MODE_SOFTLINK:

+		ext4_dir_en_set_inode_type(sb, en, EXT4_DE_SYMLINK);

+		break;

+	case EXT4_INODE_MODE_CHARDEV:

+		ext4_dir_en_set_inode_type(sb, en, EXT4_DE_CHRDEV);

+		break;

+	case EXT4_INODE_MODE_BLOCKDEV:

+		ext4_dir_en_set_inode_type(sb, en, EXT4_DE_BLKDEV);

+		break;

+	case EXT4_INODE_MODE_FIFO:

+		ext4_dir_en_set_inode_type(sb, en, EXT4_DE_FIFO);

+		break;

+	case EXT4_INODE_MODE_SOCKET:

+		ext4_dir_en_set_inode_type(sb, en, EXT4_DE_SOCK);

+		break;

+	default:

+		/* FIXME: unsupported filetype */

+		ext4_dir_en_set_inode_type(sb, en, EXT4_DE_UNKNOWN);

+	}

+	/* Set basic attributes */

+	ext4_dir_en_set_inode(en, child->index);

+	ext4_dir_en_set_entry_len(en, entry_len);

+	ext4_dir_en_set_name_len(sb, en, (u16int)name_len);

+	/* Write name */

+	memcpy(en->name, name, name_len);

+}

+int ext4_dir_add_entry(struct ext4_inode_ref *parent, const char *name,

+		       u32int name_len, struct ext4_inode_ref *child)

+{

+	int r;

+	struct ext4_fs *fs = parent->fs;

+	struct ext4_sblock *sb = &parent->fs->sb;

+	/* Index adding (if allowed) */

+	if ((ext4_sb_feature_com(sb, EXT4_FCOM_DIR_INDEX)) &&

+	    (ext4_inode_has_flag(parent->inode, EXT4_INODE_FLAG_INDEX))) {

+		r = ext4_dir_dx_add_entry(parent, child, name, name_len);

+		/* Check if index is corrupted */

+		if (r == EXT4_ERR_BAD_DX_DIR) {

+			/* Needed to clear dir index flag if corrupted */

+			ext4_inode_clear_flag(parent->inode, EXT4_INODE_FLAG_INDEX);

+			parent->dirty = true;

+		} else if (r == 0) {

+			return 0;

+		}

+	}

+	/* Linear algorithm */

+	u32int iblock = 0;

+	ext4_fsblk_t fblock = 0;

+	u32int block_size = ext4_sb_get_block_size(sb);

+	u64int inode_size = ext4_inode_get_size(sb, parent->inode);

+	u32int total_blocks = (u32int)(inode_size / block_size);

+	/* Find block, where is space for new entry and try to add */

+	bool success = false;

+	for (iblock = 0; iblock < total_blocks; ++iblock) {

+		r = ext4_fs_get_inode_dblk_idx(parent, iblock, &fblock, false);

+		if (r != 0)

+			return r;

+		struct ext4_block block;

+		r = ext4_trans_block_get(fs->bdev, &block, fblock);

+		if (r != 0)

+			return r;

+		if (!ext4_dir_csum_verify(parent, (void *)block.data)) {

+			ext4_dbg(DEBUG_DIR,

+				 DBG_WARN "Leaf block checksum failed."

+				 "Inode: %ud, "

+				 "Block: %ud\n",

+				 parent->index,

+				 iblock);

+		}

+		/* If adding is successful, function can finish */

+		r = ext4_dir_try_insert_entry(sb, parent, &block, child,

+						name, name_len);

+		if (r == 0)

+			success = true;

+		r = ext4_block_set(fs->bdev, &block);

+		if (r != 0)

+			return r;

+		if (success)

+			return 0;

+	}

+	/* No free block found - needed to allocate next data block */

+	iblock = 0;

+	fblock = 0;

+	r = ext4_fs_append_inode_dblk(parent, &fblock, &iblock);

+	if (r != 0)

+		return r;

+	/* Load new block */

+	struct ext4_block b;

+	r = ext4_trans_block_get_noread(fs->bdev, &b, fblock);

+	if (r != 0)

+		return r;

+	/* Fill block with zeroes */

+	memset(b.data, 0, block_size);

+	struct ext4_dir_en *blk_en = (void *)b.data;

+	/* Save new block */

+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {

+		u16int el = block_size - sizeof(struct ext4_dir_entry_tail);

+		ext4_dir_write_entry(sb, blk_en, el, child, name, name_len);

+		ext4_dir_init_entry_tail(EXT4_DIRENT_TAIL(b.data, block_size));

+	} else {

+		ext4_dir_write_entry(sb, blk_en, block_size, child, name,

+				name_len);

+	}

+	ext4_dir_set_csum(parent, (void *)b.data);

+	ext4_trans_set_block_dirty(b.buf);

+	r = ext4_block_set(fs->bdev, &b);

+	return r;

+}

+int ext4_dir_find_entry(struct ext4_dir_search_result *result,

+			struct ext4_inode_ref *parent, const char *name,

+			u32int name_len)

+{

+	int r;

+	struct ext4_sblock *sb = &parent->fs->sb;

+	/* Entry clear */

+	result->block.lb_id = 0;

+	result->dentry = nil;

+	/* Index search */

+	if ((ext4_sb_feature_com(sb, EXT4_FCOM_DIR_INDEX)) &&

+	    (ext4_inode_has_flag(parent->inode, EXT4_INODE_FLAG_INDEX))) {

+		r = ext4_dir_dx_find_entry(result, parent, name_len, name);

+		/* Check if index is corrupted */

+		if (r == EXT4_ERR_BAD_DX_DIR) {

+			/* Needed to clear dir index flag if corrupted */

+			ext4_inode_clear_flag(parent->inode, EXT4_INODE_FLAG_INDEX);

+			parent->dirty = true;

+		} else if (r == 0) {

+			return 0;

+		}

+	}

+	/* Linear algorithm */

+	u32int iblock;

+	ext4_fsblk_t fblock;

+	u32int block_size = ext4_sb_get_block_size(sb);

+	u64int inode_size = ext4_inode_get_size(sb, parent->inode);

+	u32int total_blocks = (u32int)(inode_size / block_size);

+	/* Walk through all data blocks */

+	for (iblock = 0; iblock < total_blocks; ++iblock) {

+		/* Load block address */

+		r = ext4_fs_get_inode_dblk_idx(parent, iblock, &fblock, false);

+		if (r != 0)

+			return r;

+		/* Load data block */

+		struct ext4_block b;

+		r = ext4_trans_block_get(parent->fs->bdev, &b, fblock);

+		if (r != 0){

+			werrstr("ext4_trans_block_get: %r");

+			return r;

+		}

+		if (!ext4_dir_csum_verify(parent, (void *)b.data)) {

+			ext4_dbg(DEBUG_DIR,

+				 DBG_WARN "Leaf block checksum failed."

+				 "Inode: %ud, "

+				 "Block: %ud\n",

+				 parent->index,

+				 iblock);

+		}

+		/* Try to find entry in block */

+		struct ext4_dir_en *res_entry;

+		r = ext4_dir_find_in_block(&b, sb, name_len, name, &res_entry);

+		if (r == 0) {

+			result->block = b;

+			result->dentry = res_entry;

+			return 0;

+		}

+		/* Entry not found - put block and continue to the next block */

+		r = ext4_block_set(parent->fs->bdev, &b);

+		if (r != 0)

+			return r;

+	}

+	return EXT4_ERR_NOT_FOUND;

+}

+int ext4_dir_remove_entry(struct ext4_inode_ref *parent, const char *name,

+			  u32int name_len)

+{

+	struct ext4_sblock *sb = &parent->fs->sb;

+	/* Check if removing from directory */

+	if (!ext4_inode_is_type(sb, parent->inode, EXT4_INODE_MODE_DIRECTORY)) {

+		werrstr("not a directory");

+		return -1;

+	}

+	/* Try to find entry */

+	struct ext4_dir_search_result result;

+	int rc = ext4_dir_find_entry(&result, parent, name, name_len);

+	if (rc != 0)

+		return rc;

+	/* Invalidate entry */

+	ext4_dir_en_set_inode(result.dentry, 0);

+	/* Store entry position in block */

+	u32int pos = (u8int *)result.dentry - result.block.data;

+	/*

+	 * If entry is not the first in block, it must be merged

+	 * with previous entry

+	 */

+	if (pos != 0) {

+		u32int offset = 0;

+		/* Start from the first entry in block */

+		struct ext4_dir_en *tmp_de =(void *)result.block.data;

+		u16int de_len = ext4_dir_en_get_entry_len(tmp_de);

+		/* Find direct predecessor of removed entry */

+		while ((offset + de_len) < pos) {

+			offset += ext4_dir_en_get_entry_len(tmp_de);

+			tmp_de = (void *)(result.block.data + offset);

+			de_len = ext4_dir_en_get_entry_len(tmp_de);

+		}

+		assert(de_len + offset == pos);

+		/* Add to removed entry length to predecessor's length */

+		u16int del_len;

+		del_len = ext4_dir_en_get_entry_len(result.dentry);

+		ext4_dir_en_set_entry_len(tmp_de, de_len + del_len);

+	}

+	ext4_dir_set_csum(parent,

+			(struct ext4_dir_en *)result.block.data);

+	ext4_trans_set_block_dirty(result.block.buf);

+	return ext4_dir_destroy_result(parent, &result);

+}

+int ext4_dir_try_insert_entry(struct ext4_sblock *sb,

+			      struct ext4_inode_ref *inode_ref,

+			      struct ext4_block *dst_blk,

+			      struct ext4_inode_ref *child, const char *name,

+			      u32int name_len)

+{

+	/* Compute required length entry and align it to 4 bytes */

+	u32int block_size = ext4_sb_get_block_size(sb);

+	u16int required_len = sizeof(struct ext4_fake_dir_entry) + name_len;

+	if ((required_len % 4) != 0)

+		required_len += 4 - (required_len % 4);

+	/* Initialize pointers, stop means to upper bound */

+	struct ext4_dir_en *start = (void *)dst_blk->data;

+	struct ext4_dir_en *stop = (void *)(dst_blk->data + block_size);

+	/*

+	 * Walk through the block and check for invalid entries

+	 * or entries with free space for new entry

+	 */

+	while (start < stop) {

+		u32int inode = ext4_dir_en_get_inode(start);

+		u16int rec_len = ext4_dir_en_get_entry_len(start);

+		u8int itype = ext4_dir_en_get_inode_type(sb, start);

+		/* If invalid and large enough entry, use it */

+		if ((inode == 0) && (itype != EXT4_DIRENTRY_DIR_CSUM) &&

+		    (rec_len >= required_len)) {

+			ext4_dir_write_entry(sb, start, rec_len, child, name,

+					     name_len);

+			ext4_dir_set_csum(inode_ref, (void *)dst_blk->data);

+			ext4_trans_set_block_dirty(dst_blk->buf);

+			return 0;

+		}

+		/* Valid entry, try to split it */

+		if (inode != 0) {

+			u16int used_len;

+			used_len = ext4_dir_en_get_name_len(sb, start);

+			u16int sz;

+			sz = sizeof(struct ext4_fake_dir_entry) + used_len;

+			if ((used_len % 4) != 0)

+				sz += 4 - (used_len % 4);

+			u16int free_space = rec_len - sz;

+			/* There is free space for new entry */

+			if (free_space >= required_len) {

+				/* Cut tail of current entry */

+				struct ext4_dir_en * new_entry;

+				new_entry = (void *)((u8int *)start + sz);

+				ext4_dir_en_set_entry_len(start, sz);

+				ext4_dir_write_entry(sb, new_entry, free_space,

+						     child, name, name_len);

+				ext4_dir_set_csum(inode_ref,

+						  (void *)dst_blk->data);

+				ext4_trans_set_block_dirty(dst_blk->buf);

+				return 0;

+			}

+		}

+		/* Jump to the next entry */

+		start = (void *)((u8int *)start + rec_len);

+	}

+	/* No free space found for new entry */

+	werrstr("no space");

+	return -1;

+}

+int ext4_dir_find_in_block(struct ext4_block *block, struct ext4_sblock *sb,

+			   usize name_len, const char *name,

+			   struct ext4_dir_en **res_entry)

+{

+	/* Start from the first entry in block */

+	struct ext4_dir_en *de = (struct ext4_dir_en *)block->data;

+	/* Set upper bound for cycling */

+	u8int *addr_limit = block->data + ext4_sb_get_block_size(sb);

+	/* Walk through the block and check entries */

+	while ((u8int *)de < addr_limit) {

+		/* Termination condition */

+		if ((u8int *)de + name_len > addr_limit)

+			break;

+		/* Valid entry - check it */

+		if (ext4_dir_en_get_inode(de) != 0) {

+			/* For more efficient compare only lengths firstly*/

+			int el = ext4_dir_en_get_name_len(sb, de);

+			if (el == name_len) {

+				/* Compare names */

+				if (memcmp(name, de->name, name_len) == 0) {

+					*res_entry = de;

+					return 0;

+				}

+			}

+		}

+		u16int de_len = ext4_dir_en_get_entry_len(de);

+		if (de_len == 0) {

+			werrstr("corrupt entry");

+			return -1;

+		}

+		/* Jump to next entry */

+		de = (struct ext4_dir_en *)((u8int *)de + de_len);

+	}

+	/* Entry not found */

+	return EXT4_ERR_NOT_FOUND;

+}

+int ext4_dir_destroy_result(struct ext4_inode_ref *parent,

+			    struct ext4_dir_search_result *result)

+{

+	if (result->block.lb_id)

+		return ext4_block_set(parent->fs->bdev, &result->block);

+	return 0;

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_dir_idx.c

@@ -1,0 +1,1356 @@

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+#include "ext4_debug.h"

+#include "ext4_trans.h"

+#include "ext4_dir_idx.h"

+#include "ext4_dir.h"

+#include "ext4_blockdev.h"

+#include "ext4_fs.h"

+#include "ext4_super.h"

+#include "ext4_inode.h"

+#include "ext4_crc32.h"

+#include "ext4_hash.h"

+/**@brief Get hash version used in directory index.

+ * @param root_info Pointer to root info structure of index

+ * @return Hash algorithm version

+ */

+static inline u8int

+ext4_dir_dx_rinfo_get_hash_version(struct ext4_dir_idx_rinfo *ri)

+{

+	return ri->hash_version;

+}

+/**@brief Set hash version, that will be used in directory index.

+ * @param root_info Pointer to root info structure of index

+ * @param v Hash algorithm version

+ */

+static inline void

+ext4_dir_dx_rinfo_set_hash_version(struct ext4_dir_idx_rinfo *ri, u8int v)

+{

+	ri->hash_version = v;

+}

+/**@brief Get length of root_info structure in bytes.

+ * @param root_info Pointer to root info structure of index

+ * @return Length of the structure

+ */

+static inline u8int

+ext4_dir_dx_rinfo_get_info_length(struct ext4_dir_idx_rinfo *ri)

+{

+	return ri->info_length;

+}

+/**@brief Set length of root_info structure in bytes.

+ * @param root_info   Pointer to root info structure of index

+ * @param info_length Length of the structure

+ */

+static inline void

+ext4_dir_dx_root_info_set_info_length(struct ext4_dir_idx_rinfo *ri,

+				      u8int len)

+{

+	ri->info_length = len;

+}

+/**@brief Get number of indirect levels of HTree.

+ * @param root_info Pointer to root info structure of index

+ * @return Height of HTree (actually only 0 or 1)

+ */

+static inline u8int

+ext4_dir_dx_rinfo_get_indirect_levels(struct ext4_dir_idx_rinfo *ri)

+{

+	return ri->indirect_levels;

+}

+/**@brief Set number of indirect levels of HTree.

+ * @param root_info Pointer to root info structure of index

+ * @param lvl Height of HTree (actually only 0 or 1)

+ */

+static inline void

+ext4_dir_dx_rinfo_set_indirect_levels(struct ext4_dir_idx_rinfo *ri, u8int l)

+{

+	ri->indirect_levels = l;

+}

+/**@brief Get maximum number of index node entries.

+ * @param climit Pointer to counlimit structure

+ * @return Maximum of entries in node

+ */

+static inline u16int

+ext4_dir_dx_climit_get_limit(struct ext4_dir_idx_climit *climit)

+{

+	return to_le16(climit->limit);

+}

+/**@brief Set maximum number of index node entries.

+ * @param climit Pointer to counlimit structure

+ * @param limit Maximum of entries in node

+ */

+static inline void

+ext4_dir_dx_climit_set_limit(struct ext4_dir_idx_climit *climit, u16int limit)

+{

+	climit->limit = to_le16(limit);

+}

+/**@brief Get current number of index node entries.

+ * @param climit Pointer to counlimit structure

+ * @return Number of entries in node

+ */

+static inline u16int

+ext4_dir_dx_climit_get_count(struct ext4_dir_idx_climit *climit)

+{

+	return to_le16(climit->count);

+}

+/**@brief Set current number of index node entries.

+ * @param climit Pointer to counlimit structure

+ * @param count Number of entries in node

+ */

+static inline void

+ext4_dir_dx_climit_set_count(struct ext4_dir_idx_climit *climit, u16int count)

+{

+	climit->count = to_le16(count);

+}

+/**@brief Get hash value of index entry.

+ * @param entry Pointer to index entry

+ * @return Hash value

+ */

+static inline u32int

+ext4_dir_dx_entry_get_hash(struct ext4_dir_idx_entry *entry)

+{

+	return to_le32(entry->hash);

+}

+/**@brief Set hash value of index entry.

+ * @param entry Pointer to index entry

+ * @param hash  Hash value

+ */

+static inline void

+ext4_dir_dx_entry_set_hash(struct ext4_dir_idx_entry *entry, u32int hash)

+{

+	entry->hash = to_le32(hash);

+}

+/**@brief Get block address where child node is located.

+ * @param entry Pointer to index entry

+ * @return Block address of child node

+ */

+static inline u32int

+ext4_dir_dx_entry_get_block(struct ext4_dir_idx_entry *entry)

+{

+	return to_le32(entry->block);

+}

+/**@brief Set block address where child node is located.

+ * @param entry Pointer to index entry

+ * @param block Block address of child node

+ */

+static inline void

+ext4_dir_dx_entry_set_block(struct ext4_dir_idx_entry *entry, u32int block)

+{

+	entry->block = to_le32(block);

+}

+/**@brief Sort entry item.*/

+struct ext4_dx_sort_entry {

+	u32int hash;

+	u32int rec_len;

+	void *dentry;

+};

+static int ext4_dir_dx_hash_string(struct ext4_hash_info *hinfo, int len,

+				   const char *name)

+{

+	return ext2_htree_hash(name, len, hinfo->seed, hinfo->hash_version,

+			       &hinfo->hash, &hinfo->minor_hash);

+}

+static u32int ext4_dir_dx_checksum(struct ext4_inode_ref *inode_ref, void *de,

+				     int count_offset, int count,

+				     struct ext4_dir_idx_tail *t)

+{

+	u32int orig_cum, csum = 0;

+	struct ext4_sblock *sb = &inode_ref->fs->sb;

+	int sz;

+	/* Compute the checksum only if the filesystem supports it */

+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {

+		u32int ino_index = to_le32(inode_ref->index);

+		u32int ino_gen;

+		ino_gen = to_le32(ext4_inode_get_generation(inode_ref->inode));

+		sz = count_offset + (count * sizeof(struct ext4_dir_idx_tail));

+		orig_cum = t->checksum;

+		t->checksum = 0;

+		/* First calculate crc32 checksum against fs uuid */

+		csum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid, sizeof(sb->uuid));

+		/* Then calculate crc32 checksum against inode number

+		 * and inode generation */

+		csum = ext4_crc32c(csum, &ino_index, sizeof(ino_index));

+		csum = ext4_crc32c(csum, &ino_gen, sizeof(ino_gen));

+		/* After that calculate crc32 checksum against all the dx_entry */

+		csum = ext4_crc32c(csum, de, sz);

+		/* Finally calculate crc32 checksum for dx_tail */

+		csum = ext4_crc32c(csum, t, sizeof(struct ext4_dir_idx_tail));

+		t->checksum = orig_cum;

+	}

+	return csum;

+}

+static struct ext4_dir_idx_climit *

+ext4_dir_dx_get_climit(struct ext4_inode_ref *inode_ref,

+			   struct ext4_dir_en *dirent, int *offset)

+{

+	struct ext4_dir_en *dp;

+	struct ext4_dir_idx_root *root;

+	struct ext4_sblock *sb = &inode_ref->fs->sb;

+	u32int block_size = ext4_sb_get_block_size(sb);

+	u16int entry_len = ext4_dir_en_get_entry_len(dirent);

+	int count_offset;

+	if (entry_len == 12) {

+		root = (struct ext4_dir_idx_root *)dirent;

+		dp = (struct ext4_dir_en *)&root->dots[1];

+		if (ext4_dir_en_get_entry_len(dp) != (block_size - 12))

+			return nil;

+		if (root->info.reserved_zero)

+			return nil;

+		if (root->info.info_length != sizeof(struct ext4_dir_idx_rinfo))

+			return nil;

+		count_offset = 32;

+	} else if (entry_len == block_size) {

+		count_offset = 8;

+	} else {

+		return nil;

+	}

+	if (offset)

+		*offset = count_offset;

+	return (struct ext4_dir_idx_climit *)(((char *)dirent) + count_offset);

+}

+/*

+ * BIG FAT NOTES:

+ *       Currently we do not verify the checksum of HTree node.

+ */

+static bool ext4_dir_dx_csum_verify(struct ext4_inode_ref *inode_ref,

+				    struct ext4_dir_en *de)

+{

+	struct ext4_sblock *sb = &inode_ref->fs->sb;

+	u32int block_size = ext4_sb_get_block_size(sb);

+	int coff, limit, cnt;

+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {

+		struct ext4_dir_idx_climit *climit;

+		climit = ext4_dir_dx_get_climit(inode_ref, de, &coff);

+		if (!climit) {

+			/* Directory seems corrupted. */

+			return true;

+		}

+		struct ext4_dir_idx_tail *t;

+		limit = ext4_dir_dx_climit_get_limit(climit);

+		cnt = ext4_dir_dx_climit_get_count(climit);

+		if (coff + (limit * sizeof(struct ext4_dir_idx_entry)) >

+		    (block_size - sizeof(struct ext4_dir_idx_tail))) {

+			/* There is no space to hold the checksum */

+			return true;

+		}

+		t = (void *)(((struct ext4_dir_idx_entry *)climit) + limit);

+		u32int c;

+		c = to_le32(ext4_dir_dx_checksum(inode_ref, de, coff, cnt, t));

+		if (t->checksum != c)

+			return false;

+	}

+	return true;

+}

+static void ext4_dir_set_dx_csum(struct ext4_inode_ref *inode_ref,

+				 struct ext4_dir_en *dirent)

+{

+	int coff, limit, count;

+	struct ext4_sblock *sb = &inode_ref->fs->sb;

+	u32int block_size = ext4_sb_get_block_size(sb);

+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {

+		struct ext4_dir_idx_climit *climit;

+		climit = ext4_dir_dx_get_climit(inode_ref, dirent, &coff);

+		if (!climit) {

+			/* Directory seems corrupted. */

+			return;

+		}

+		struct ext4_dir_idx_tail *t;

+		limit = ext4_dir_dx_climit_get_limit(climit);

+		count = ext4_dir_dx_climit_get_count(climit);

+		if (coff + (limit * sizeof(struct ext4_dir_idx_entry)) >

+		   (block_size - sizeof(struct ext4_dir_idx_tail))) {

+			/* There is no space to hold the checksum */

+			return;

+		}

+		t = (void *)(((struct ext4_dir_idx_entry *)climit) + limit);

+		t->checksum = to_le32(ext4_dir_dx_checksum(inode_ref, dirent,

+					coff, count, t));

+	}

+}

+/****************************************************************************/

+int ext4_dir_dx_init(struct ext4_inode_ref *dir, struct ext4_inode_ref *parent)

+{

+	/* Load block 0, where will be index root located */

+	ext4_fsblk_t fblock;

+	u32int iblock = 0;

+	bool need_append =

+		(ext4_inode_get_size(&dir->fs->sb, dir->inode)

+			< EXT4_DIR_DX_INIT_BCNT)

+		? true : false;

+	struct ext4_sblock *sb = &dir->fs->sb;

+	u32int block_size = ext4_sb_get_block_size(&dir->fs->sb);

+	struct ext4_block block;

+	int rc;

+	if (!need_append)

+		rc = ext4_fs_init_inode_dblk_idx(dir, iblock, &fblock);

+	else

+		rc = ext4_fs_append_inode_dblk(dir, &fblock, &iblock);

+	if (rc != 0)

+		return rc;

+	rc = ext4_trans_block_get_noread(dir->fs->bdev, &block, fblock);

+	if (rc != 0)

+		return rc;

+	/* Initialize pointers to data structures */

+	struct ext4_dir_idx_root *root = (void *)block.data;

+	struct ext4_dir_idx_rinfo *info = &(root->info);

+	memset(root, 0, sizeof(struct ext4_dir_idx_root));

+	struct ext4_dir_en *de;

+	/* Initialize dot entries */

+	de = (struct ext4_dir_en *)root->dots;

+	ext4_dir_write_entry(sb, de, 12, dir, ".", strlen("."));

+	de = (struct ext4_dir_en *)(root->dots + 1);

+	u16int elen = block_size - 12;

+	ext4_dir_write_entry(sb, de, elen, parent, "..", strlen(".."));

+	/* Initialize root info structure */

+	u8int hash_version = ext4_get8(&dir->fs->sb, default_hash_version);

+	ext4_dir_dx_rinfo_set_hash_version(info, hash_version);

+	ext4_dir_dx_rinfo_set_indirect_levels(info, 0);

+	ext4_dir_dx_root_info_set_info_length(info, 8);

+	/* Set limit and current number of entries */

+	struct ext4_dir_idx_climit *climit;

+	climit = (struct ext4_dir_idx_climit *)root->en;

+	ext4_dir_dx_climit_set_count(climit, 1);

+	u32int entry_space;

+	entry_space = block_size - 2 * sizeof(struct ext4_dir_idx_dot_en) -

+			sizeof(struct ext4_dir_idx_rinfo);

+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))

+		entry_space -= sizeof(struct ext4_dir_idx_tail);

+	u16int root_limit = entry_space / sizeof(struct ext4_dir_idx_entry);

+	ext4_dir_dx_climit_set_limit(climit, root_limit);

+	/* Append new block, where will be new entries inserted in the future */

+	iblock++;

+	if (!need_append)

+		rc = ext4_fs_init_inode_dblk_idx(dir, iblock, &fblock);

+	else

+		rc = ext4_fs_append_inode_dblk(dir, &fblock, &iblock);

+	if (rc != 0) {

+		ext4_block_set(dir->fs->bdev, &block);

+		return rc;

+	}

+	struct ext4_block new_block;

+	rc = ext4_trans_block_get_noread(dir->fs->bdev, &new_block, fblock);

+	if (rc != 0) {

+		ext4_block_set(dir->fs->bdev, &block);

+		return rc;

+	}

+	/* Fill the whole block with empty entry */

+	struct ext4_dir_en *be = (void *)new_block.data;

+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {

+		u16int len = block_size - sizeof(struct ext4_dir_entry_tail);

+		ext4_dir_en_set_entry_len(be, len);

+		ext4_dir_en_set_name_len(sb, be, 0);

+		ext4_dir_en_set_inode_type(sb, be, EXT4_DE_UNKNOWN);

+		ext4_dir_init_entry_tail(EXT4_DIRENT_TAIL(be, block_size));

+		ext4_dir_set_csum(dir, be);

+	} else {

+		ext4_dir_en_set_entry_len(be, block_size);

+	}

+	ext4_dir_en_set_inode(be, 0);

+	ext4_trans_set_block_dirty(new_block.buf);

+	rc = ext4_block_set(dir->fs->bdev, &new_block);

+	if (rc != 0) {

+		ext4_block_set(dir->fs->bdev, &block);

+		return rc;

+	}

+	/* Connect new block to the only entry in index */

+	struct ext4_dir_idx_entry *entry = root->en;

+	ext4_dir_dx_entry_set_block(entry, iblock);

+	ext4_dir_set_dx_csum(dir, (struct ext4_dir_en *)block.data);

+	ext4_trans_set_block_dirty(block.buf);

+	return ext4_block_set(dir->fs->bdev, &block);

+}

+/**@brief Initialize hash info structure necessary for index operations.

+ * @param hinfo      Pointer to hinfo to be initialized

+ * @param root_block Root block (number 0) of index

+ * @param sb         Pointer to superblock

+ * @param name_len   Length of name to be computed hash value from

+ * @param name       Name to be computed hash value from

+ * @return Standard error code

+ */

+static int ext4_dir_hinfo_init(struct ext4_hash_info *hinfo,

+			       struct ext4_block *root_block,

+			       struct ext4_sblock *sb, usize name_len,

+			       const char *name)

+{

+	struct ext4_dir_idx_root *root;

+	root = (struct ext4_dir_idx_root *)root_block->data;

+	if ((root->info.hash_version != EXT2_HTREE_LEGACY) &&

+	    (root->info.hash_version != EXT2_HTREE_HALF_MD4) &&

+	    (root->info.hash_version != EXT2_HTREE_TEA))

+		return EXT4_ERR_BAD_DX_DIR;

+	/* Check unused flags */

+	if (root->info.unused_flags != 0)

+		return EXT4_ERR_BAD_DX_DIR;

+	/* Check indirect levels */

+	if (root->info.indirect_levels > 1)

+		return EXT4_ERR_BAD_DX_DIR;

+	/* Check if node limit is correct */

+	u32int block_size = ext4_sb_get_block_size(sb);

+	u32int entry_space = block_size;

+	entry_space -= 2 * sizeof(struct ext4_dir_idx_dot_en);

+	entry_space -= sizeof(struct ext4_dir_idx_rinfo);

+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))

+		entry_space -= sizeof(struct ext4_dir_idx_tail);

+	entry_space = entry_space / sizeof(struct ext4_dir_idx_entry);

+	struct ext4_dir_idx_climit *climit = (void *)root->en;

+	u16int limit = ext4_dir_dx_climit_get_limit(climit);

+	if (limit != entry_space)

+		return EXT4_ERR_BAD_DX_DIR;

+	/* Check hash version and modify if necessary */

+	hinfo->hash_version = ext4_dir_dx_rinfo_get_hash_version(&root->info);

+	if ((hinfo->hash_version <= EXT2_HTREE_TEA) &&

+	    (ext4_sb_check_flag(sb, EXT4_SUPERBLOCK_FLAGS_UNSIGNED_HASH))) {

+		/* Use unsigned hash */

+		hinfo->hash_version += 3;

+	}

+	/* Load hash seed from superblock */

+	hinfo->seed = ext4_get8(sb, hash_seed);

+	/* Compute hash value of name */

+	if (name)

+		return ext4_dir_dx_hash_string(hinfo, name_len, name);

+	return 0;

+}

+/**@brief Walk through index tree and load leaf with corresponding hash value.

+ * @param hinfo      Initialized hash info structure

+ * @param inode_ref  Current i-node

+ * @param root_block Root block (iblock 0), where is root node located

+ * @param dx_block   Pointer to leaf node in dx_blocks array

+ * @param dx_blocks  Array with the whole path from root to leaf

+ * @return Standard error code

+ */

+static int ext4_dir_dx_get_leaf(struct ext4_hash_info *hinfo,

+				struct ext4_inode_ref *inode_ref,

+				struct ext4_block *root_block,

+				struct ext4_dir_idx_block **dx_block,

+				struct ext4_dir_idx_block *dx_blocks)

+{

+	struct ext4_dir_idx_root *root;

+	struct ext4_dir_idx_entry *entries;

+	struct ext4_dir_idx_entry *p;

+	struct ext4_dir_idx_entry *q;

+	struct ext4_dir_idx_entry *m;

+	struct ext4_dir_idx_entry *at;

+	ext4_fsblk_t fblk;

+	u32int block_size;

+	u16int limit;

+	u16int entry_space;

+	u8int ind_level;

+	int r;

+	struct ext4_dir_idx_block *tmp_dx_blk = dx_blocks;

+	struct ext4_block *tmp_blk = root_block;

+	struct ext4_sblock *sb = &inode_ref->fs->sb;

+	block_size = ext4_sb_get_block_size(sb);

+	root = (struct ext4_dir_idx_root *)root_block->data;

+	entries = (struct ext4_dir_idx_entry *)root->en;

+	limit = ext4_dir_dx_climit_get_limit((void *)entries);

+	ind_level = ext4_dir_dx_rinfo_get_indirect_levels(&root->info);

+	/* Walk through the index tree */

+	while (true) {

+		u16int cnt = ext4_dir_dx_climit_get_count((void *)entries);

+		if ((cnt == 0) || (cnt > limit))

+			return EXT4_ERR_BAD_DX_DIR;

+		/* Do binary search in every node */

+		p = entries + 1;

+		q = entries + cnt - 1;

+		while (p <= q) {

+			m = p + (q - p) / 2;

+			if (ext4_dir_dx_entry_get_hash(m) > hinfo->hash)

+				q = m - 1;

+			else

+				p = m + 1;

+		}

+		at = p - 1;

+		/* Write results */

+		memcpy(&tmp_dx_blk->b, tmp_blk, sizeof(struct ext4_block));

+		tmp_dx_blk->entries = entries;

+		tmp_dx_blk->position = at;

+		/* Is algorithm in the leaf? */

+		if (ind_level == 0) {

+			*dx_block = tmp_dx_blk;

+			return 0;

+		}

+		/* Goto child node */

+		u32int n_blk = ext4_dir_dx_entry_get_block(at);

+		ind_level--;

+		r = ext4_fs_get_inode_dblk_idx(inode_ref, n_blk, &fblk, false);

+		if (r != 0)

+			return r;

+		r = ext4_trans_block_get(inode_ref->fs->bdev, tmp_blk, fblk);

+		if (r != 0)

+			return r;

+		entries = ((struct ext4_dir_idx_node *)tmp_blk->data)->entries;

+		limit = ext4_dir_dx_climit_get_limit((void *)entries);

+		entry_space = block_size - sizeof(struct ext4_fake_dir_entry);

+		if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))

+			entry_space -= sizeof(struct ext4_dir_idx_tail);

+		entry_space = entry_space / sizeof(struct ext4_dir_idx_entry);

+		if (limit != entry_space) {

+			ext4_block_set(inode_ref->fs->bdev, tmp_blk);

+			return EXT4_ERR_BAD_DX_DIR;

+		}

+		if (!ext4_dir_dx_csum_verify(inode_ref, (void *)tmp_blk->data)) {

+			ext4_dbg(DEBUG_DIR_IDX,

+					DBG_WARN "HTree checksum failed."

+					"Inode: %ud, "

+					"Block: %ud\n",

+					inode_ref->index,

+					n_blk);

+		}

+		++tmp_dx_blk;

+	}

+	/* Unreachable */

+}

+/**@brief Check if the the next block would be checked during entry search.

+ * @param inode_ref Directory i-node

+ * @param hash      Hash value to check

+ * @param dx_block  Current block

+ * @param dx_blocks Array with path from root to leaf node

+ * @return Standard Error code

+ */

+static int ext4_dir_dx_next_block(struct ext4_inode_ref *inode_ref,

+				  u32int hash,

+				  struct ext4_dir_idx_block *dx_block,

+				  struct ext4_dir_idx_block *dx_blocks)

+{

+	int r;

+	u32int num_handles = 0;

+	ext4_fsblk_t blk_adr;

+	struct ext4_dir_idx_block *p = dx_block;

+	/* Try to find data block with next bunch of entries */

+	while (true) {

+		u16int cnt = ext4_dir_dx_climit_get_count((void *)p->entries);

+		p->position++;

+		if (p->position < p->entries + cnt)

+			break;

+		if (p == dx_blocks)

+			return 0;

+		num_handles++;

+		p--;

+	}

+	/* Check hash collision (if not occurred - no next block cannot be

+	 * used)*/

+	u32int current_hash = ext4_dir_dx_entry_get_hash(p->position);

+	if ((hash & 1) == 0) {

+		if ((current_hash & ~1) != hash)

+			return 0;

+	}

+	/* Fill new path */

+	while (num_handles--) {

+		u32int blk = ext4_dir_dx_entry_get_block(p->position);

+		r = ext4_fs_get_inode_dblk_idx(inode_ref, blk, &blk_adr, false);

+		if (r != 0)

+			return r;

+		struct ext4_block b;

+		r = ext4_trans_block_get(inode_ref->fs->bdev, &b, blk_adr);

+		if (r != 0)

+			return r;

+		if (!ext4_dir_dx_csum_verify(inode_ref, (void *)b.data)) {

+			ext4_dbg(DEBUG_DIR_IDX,

+					DBG_WARN "HTree checksum failed."

+					"Inode: %ud, "

+					"Block: %ud\n",

+					inode_ref->index,

+					blk);

+		}

+		p++;

+		/* Don't forget to put old block (prevent memory leak) */

+		r = ext4_block_set(inode_ref->fs->bdev, &p->b);

+		if (r != 0)

+			return r;

+		memcpy(&p->b, &b, sizeof(b));

+		p->entries = ((struct ext4_dir_idx_node *)b.data)->entries;

+		p->position = p->entries;

+	}

+	return EXT4_ERR_NOT_FOUND;

+}

+int ext4_dir_dx_find_entry(struct ext4_dir_search_result *result,

+			   struct ext4_inode_ref *inode_ref, usize name_len,

+			   const char *name)

+{

+	/* Load direct block 0 (index root) */

+	ext4_fsblk_t root_block_addr;

+	int rc2;

+	int rc;

+	rc = ext4_fs_get_inode_dblk_idx(inode_ref,  0, &root_block_addr, false);

+	if (rc != 0)

+		return rc;

+	struct ext4_fs *fs = inode_ref->fs;

+	struct ext4_block root_block;

+	rc = ext4_trans_block_get(fs->bdev, &root_block, root_block_addr);

+	if (rc != 0)

+		return rc;

+	if (!ext4_dir_dx_csum_verify(inode_ref, (void *)root_block.data)) {

+		ext4_dbg(DEBUG_DIR_IDX,

+			 DBG_WARN "HTree root checksum failed."

+			 "Inode: %ud, "

+			 "Block: %ud\n",

+			 inode_ref->index,

+			 (u32int)0);

+	}

+	/* Initialize hash info (compute hash value) */

+	struct ext4_hash_info hinfo;

+	rc = ext4_dir_hinfo_init(&hinfo, &root_block, &fs->sb, name_len, name);

+	if (rc != 0) {

+		ext4_block_set(fs->bdev, &root_block);

+		return EXT4_ERR_BAD_DX_DIR;

+	}

+	/*

+	 * Hardcoded number 2 means maximum height of index tree,

+	 * specified in the Linux driver.

+	 */

+	struct ext4_dir_idx_block dx_blocks[2];

+	struct ext4_dir_idx_block *dx_block;

+	struct ext4_dir_idx_block *tmp;

+	rc = ext4_dir_dx_get_leaf(&hinfo, inode_ref, &root_block, &dx_block,

+				  dx_blocks);

+	if (rc != 0) {

+		ext4_block_set(fs->bdev, &root_block);

+		return EXT4_ERR_BAD_DX_DIR;

+	}

+	for (;;) {

+		/* Load leaf block */

+		u32int leaf_blk_idx;

+		ext4_fsblk_t leaf_block_addr;

+		struct ext4_block b;

+		leaf_blk_idx = ext4_dir_dx_entry_get_block(dx_block->position);

+		rc = ext4_fs_get_inode_dblk_idx(inode_ref, leaf_blk_idx,

+						&leaf_block_addr, false);

+		if (rc != 0)

+			break;

+		rc = ext4_trans_block_get(fs->bdev, &b, leaf_block_addr);

+		if (rc != 0)

+			break;

+		if (!ext4_dir_csum_verify(inode_ref, (void *)b.data)) {

+			ext4_dbg(DEBUG_DIR_IDX,

+				 DBG_WARN "HTree leaf block checksum failed."

+				 "Inode: %ud, "

+				 "Block: %ud\n",

+				 inode_ref->index,

+				 leaf_blk_idx);

+		}

+		/* Linear search inside block */

+		struct ext4_dir_en *de;

+		rc = ext4_dir_find_in_block(&b, &fs->sb, name_len, name, &de);

+		/* Found => return it */

+		if (rc == 0) {

+			result->block = b;

+			result->dentry = de;

+			break;

+		}

+		/* Not found, leave untouched */

+		rc2 = ext4_block_set(fs->bdev, &b);

+		if (rc2 != 0)

+			break;

+		if (rc != EXT4_ERR_NOT_FOUND)

+			break;

+		/* check if the next block could be checked */

+		rc = ext4_dir_dx_next_block(inode_ref, hinfo.hash, dx_block, &dx_blocks[0]);

+		if (rc != 0) {

+			if (rc == EXT4_ERR_NOT_FOUND)

+				continue;

+			break;

+		}

+	}

+	/* The whole path must be released (preventing memory leak) */

+	tmp = dx_blocks;

+	while (tmp <= dx_block) {

+		rc2 = ext4_block_set(fs->bdev, &tmp->b);

+		if (rc == 0 && rc2 != 0)

+			rc = rc2;

+		++tmp;

+	}

+	return rc;

+}

+/**@brief  Compare function used to pass in quicksort implementation.

+ *         It can compare two entries by hash value.

+ * @param arg1  First entry

+ * @param arg2  Second entry

+ * @param dummy Unused parameter, can be nil

+ *

+ * @return Classic compare result

+ *         (0: equal, -1: arg1 < arg2, 1: arg1 > arg2)

+ */

+static int ext4_dir_dx_entry_comparator(const void *arg1, const void *arg2)

+{

+	struct ext4_dx_sort_entry *entry1 = (void *)arg1;

+	struct ext4_dx_sort_entry *entry2 = (void *)arg2;

+	if (entry1->hash == entry2->hash)

+		return 0;

+	if (entry1->hash < entry2->hash)

+		return -1;

+	else

+		return 1;

+}

+/**@brief  Insert new index entry to block.

+ *         Note that space for new entry must be checked by caller.

+ * @param inode_ref   Directory i-node

+ * @param index_block Block where to insert new entry

+ * @param hash        Hash value covered by child node

+ * @param iblock      Logical number of child block

+ *

+ */

+static void

+ext4_dir_dx_insert_entry(struct ext4_inode_ref *inode_ref,

+			 struct ext4_dir_idx_block *index_block,

+			 u32int hash, u32int iblock)

+{

+	struct ext4_dir_idx_entry *old_index_entry = index_block->position;

+	struct ext4_dir_idx_entry *new_index_entry = old_index_entry + 1;

+	struct ext4_dir_idx_climit *climit = (void *)index_block->entries;

+	struct ext4_dir_idx_entry *start_index = index_block->entries;

+	u32int count = ext4_dir_dx_climit_get_count(climit);

+	usize bytes;

+	bytes = (u8int *)(start_index + count) - (u8int *)(new_index_entry);

+	memmove(new_index_entry + 1, new_index_entry, bytes);

+	ext4_dir_dx_entry_set_block(new_index_entry, iblock);

+	ext4_dir_dx_entry_set_hash(new_index_entry, hash);

+	ext4_dir_dx_climit_set_count(climit, count + 1);

+	ext4_dir_set_dx_csum(inode_ref, (void *)index_block->b.data);

+	ext4_trans_set_block_dirty(index_block->b.buf);

+}

+/**@brief Split directory entries to two parts preventing node overflow.

+ * @param inode_ref      Directory i-node

+ * @param hinfo          Hash info

+ * @param old_data_block Block with data to be split

+ * @param index_block    Block where index entries are located

+ * @param new_data_block Output value for newly allocated data block

+ */

+static int ext4_dir_dx_split_data(struct ext4_inode_ref *inode_ref,

+				  struct ext4_hash_info *hinfo,

+				  struct ext4_block *old_data_block,

+				  struct ext4_dir_idx_block *index_block,

+				  struct ext4_block *new_data_block)

+{

+	int rc;

+	struct ext4_sblock *sb = &inode_ref->fs->sb;

+	u32int block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);

+	/* Allocate buffer for directory entries */

+	u8int *entry_buffer = ext4_malloc(block_size);

+	if (entry_buffer == nil) {

+		werrstr(Enomem);

+		return -1;

+	}

+	/* dot entry has the smallest size available */

+	u32int max_ecnt = block_size / sizeof(struct ext4_dir_idx_dot_en);

+	/* Allocate sort entry */

+	struct ext4_dx_sort_entry *sort;

+	sort = ext4_malloc(max_ecnt * sizeof(struct ext4_dx_sort_entry));

+	if (sort == nil) {

+		ext4_free(entry_buffer);

+		werrstr(Enomem);

+		return -1;

+	}

+	u32int idx = 0;

+	u32int real_size = 0;

+	/* Initialize hinfo */

+	struct ext4_hash_info hinfo_tmp;

+	memcpy(&hinfo_tmp, hinfo, sizeof(struct ext4_hash_info));

+	/* Load all valid entries to the buffer */

+	struct ext4_dir_en *de = (void *)old_data_block->data;

+	u8int *entry_buffer_ptr = entry_buffer;

+	while ((void *)de < (void *)(old_data_block->data + block_size)) {

+		/* Read only valid entries */

+		if (ext4_dir_en_get_inode(de) && de->name_len) {

+			u16int len = ext4_dir_en_get_name_len(sb, de);

+			rc = ext4_dir_dx_hash_string(&hinfo_tmp, len,

+						     (char *)de->name);

+			if (rc != 0) {

+				ext4_free(sort);

+				ext4_free(entry_buffer);

+				return rc;

+			}

+			u32int rec_len = 8 + len;

+			if ((rec_len % 4) != 0)

+				rec_len += 4 - (rec_len % 4);

+			memcpy(entry_buffer_ptr, de, rec_len);

+			sort[idx].dentry = entry_buffer_ptr;

+			sort[idx].rec_len = rec_len;

+			sort[idx].hash = hinfo_tmp.hash;

+			entry_buffer_ptr += rec_len;

+			real_size += rec_len;

+			idx++;

+		}

+		usize elen = ext4_dir_en_get_entry_len(de);

+		de = (void *)((u8int *)de + elen);

+	}

+	qsort(sort, idx, sizeof(struct ext4_dx_sort_entry),

+	      ext4_dir_dx_entry_comparator);

+	/* Allocate new block for store the second part of entries */

+	ext4_fsblk_t new_fblock;

+	u32int new_iblock;

+	rc = ext4_fs_append_inode_dblk(inode_ref, &new_fblock, &new_iblock);

+	if (rc != 0) {

+		ext4_free(sort);

+		ext4_free(entry_buffer);

+		return rc;

+	}

+	/* Load new block */

+	struct ext4_block new_data_block_tmp;

+	rc = ext4_trans_block_get_noread(inode_ref->fs->bdev, &new_data_block_tmp,

+				   new_fblock);

+	if (rc != 0) {

+		ext4_free(sort);

+		ext4_free(entry_buffer);

+		return rc;

+	}

+	/*

+	 * Distribute entries to two blocks (by size)

+	 * - compute the half

+	 */

+	u32int new_hash = 0;

+	u32int current_size = 0;

+	u32int mid = 0;

+	u32int i;

+	for (i = 0; i < idx; ++i) {

+		if ((current_size + sort[i].rec_len) > (block_size / 2)) {

+			new_hash = sort[i].hash;

+			mid = i;

+			break;

+		}

+		current_size += sort[i].rec_len;

+	}

+	/* Check hash collision */

+	u32int continued = 0;

+	if (new_hash == sort[mid - 1].hash)

+		continued = 1;

+	u32int off = 0;

+	void *ptr;

+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))

+		block_size -= sizeof(struct ext4_dir_entry_tail);

+	/* First part - to the old block */

+	for (i = 0; i < mid; ++i) {

+		ptr = old_data_block->data + off;

+		memcpy(ptr, sort[i].dentry, sort[i].rec_len);

+		struct ext4_dir_en *t = ptr;

+		if (i < (mid - 1))

+			ext4_dir_en_set_entry_len(t, sort[i].rec_len);

+		else

+			ext4_dir_en_set_entry_len(t, block_size - off);

+		off += sort[i].rec_len;

+	}

+	/* Second part - to the new block */

+	off = 0;

+	for (i = mid; i < idx; ++i) {

+		ptr = new_data_block_tmp.data + off;

+		memcpy(ptr, sort[i].dentry, sort[i].rec_len);

+		struct ext4_dir_en *t = ptr;

+		if (i < (idx - 1))

+			ext4_dir_en_set_entry_len(t, sort[i].rec_len);

+		else

+			ext4_dir_en_set_entry_len(t, block_size - off);

+		off += sort[i].rec_len;

+	}

+	block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);

+	/* Do some steps to finish operation */

+	sb = &inode_ref->fs->sb;

+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {

+		struct ext4_dir_entry_tail *t;

+		t = EXT4_DIRENT_TAIL(old_data_block->data, block_size);

+		ext4_dir_init_entry_tail(t);

+		t = EXT4_DIRENT_TAIL(new_data_block_tmp.data, block_size);

+		ext4_dir_init_entry_tail(t);

+	}

+	ext4_dir_set_csum(inode_ref, (void *)old_data_block->data);

+	ext4_dir_set_csum(inode_ref, (void *)new_data_block_tmp.data);

+	ext4_trans_set_block_dirty(old_data_block->buf);

+	ext4_trans_set_block_dirty(new_data_block_tmp.buf);

+	ext4_free(sort);

+	ext4_free(entry_buffer);

+	ext4_dir_dx_insert_entry(inode_ref, index_block, new_hash + continued,

+				new_iblock);

+	*new_data_block = new_data_block_tmp;

+	return 0;

+}

+/**@brief  Split index node and maybe some parent nodes in the tree hierarchy.

+ * @param inode_ref Directory i-node

+ * @param dx_blocks Array with path from root to leaf node

+ * @param dx_block  Leaf block to be split if needed

+ * @return Error code

+ */

+static int

+ext4_dir_dx_split_index(struct ext4_inode_ref *ino_ref,

+			struct ext4_dir_idx_block *dx_blks,

+			struct ext4_dir_idx_block *dxb,

+			struct ext4_dir_idx_block **new_dx_block)

+{

+	struct ext4_sblock *sb = &ino_ref->fs->sb;

+	struct ext4_dir_idx_entry *e;

+	int r;

+	u32int block_size = ext4_sb_get_block_size(&ino_ref->fs->sb);

+	u32int entry_space = block_size - sizeof(struct ext4_fake_dir_entry);

+	u32int node_limit =  entry_space / sizeof(struct ext4_dir_idx_entry);

+	bool meta_csum = ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM);

+	if (dxb == dx_blks)

+		e = ((struct ext4_dir_idx_root *)dxb->b.data)->en;

+	else

+		e = ((struct ext4_dir_idx_node *)dxb->b.data)->entries;

+	struct ext4_dir_idx_climit *climit = (struct ext4_dir_idx_climit *)e;

+	u16int leaf_limit = ext4_dir_dx_climit_get_limit(climit);

+	u16int leaf_count = ext4_dir_dx_climit_get_count(climit);

+	/* Check if is necessary to split index block */

+	if (leaf_limit == leaf_count) {

+		struct ext4_dir_idx_entry *ren;

+		intptr levels = dxb - dx_blks;

+		ren = ((struct ext4_dir_idx_root *)dx_blks[0].b.data)->en;

+		struct ext4_dir_idx_climit *rclimit = (void *)ren;

+		u16int root_limit = ext4_dir_dx_climit_get_limit(rclimit);

+		u16int root_count = ext4_dir_dx_climit_get_count(rclimit);

+		/* Linux limitation */

+		if ((levels > 0) && (root_limit == root_count)) {

+			werrstr(Enospc);

+			return -1;

+		}

+		/* Add new block to directory */

+		ext4_fsblk_t new_fblk;

+		u32int new_iblk;

+		r = ext4_fs_append_inode_dblk(ino_ref, &new_fblk, &new_iblk);

+		if (r != 0)

+			return r;

+		/* load new block */

+		struct ext4_block b;

+		r = ext4_trans_block_get_noread(ino_ref->fs->bdev, &b, new_fblk);

+		if (r != 0)

+			return r;

+		struct ext4_dir_idx_node *new_node = (void *)b.data;

+		struct ext4_dir_idx_entry *new_en = new_node->entries;

+		memset(&new_node->fake, 0, sizeof(struct ext4_fake_dir_entry));

+		new_node->fake.entry_length = block_size;

+		/* Split leaf node */

+		if (levels > 0) {

+			u32int count_left = leaf_count / 2;

+			u32int count_right = leaf_count - count_left;

+			u32int hash_right;

+			usize sz;

+			struct ext4_dir_idx_climit *left_climit;

+			struct ext4_dir_idx_climit *right_climit;

+			hash_right = ext4_dir_dx_entry_get_hash(e + count_left);

+			/* Copy data to new node */

+			sz = count_right * sizeof(struct ext4_dir_idx_entry);

+			memcpy(new_en, e + count_left, sz);

+			/* Initialize new node */

+			left_climit = (struct ext4_dir_idx_climit *)e;

+			right_climit = (struct ext4_dir_idx_climit *)new_en;

+			ext4_dir_dx_climit_set_count(left_climit, count_left);

+			ext4_dir_dx_climit_set_count(right_climit, count_right);

+			if (meta_csum)

+				entry_space -= sizeof(struct ext4_dir_idx_tail);

+			USED(entry_space);

+			ext4_dir_dx_climit_set_limit(right_climit, node_limit);

+			/* Which index block is target for new entry */

+			u32int position_index =

+			    (dxb->position - dxb->entries);

+			if (position_index >= count_left) {

+				ext4_dir_set_dx_csum(

+						ino_ref,

+						(struct ext4_dir_en *)

+						dxb->b.data);

+				ext4_trans_set_block_dirty(dxb->b.buf);

+				struct ext4_block block_tmp = dxb->b;

+				dxb->b = b;

+				dxb->position =

+				    new_en + position_index - count_left;

+				dxb->entries = new_en;

+				b = block_tmp;

+			}

+			/* Finally insert new entry */

+			ext4_dir_dx_insert_entry(ino_ref, dx_blks, hash_right,

+						 new_iblk);

+			ext4_dir_set_dx_csum(ino_ref, (void*)dx_blks[0].b.data);

+			ext4_dir_set_dx_csum(ino_ref, (void*)dx_blks[1].b.data);

+			ext4_trans_set_block_dirty(dx_blks[0].b.buf);

+			ext4_trans_set_block_dirty(dx_blks[1].b.buf);

+			ext4_dir_set_dx_csum(ino_ref, (void *)b.data);

+			ext4_trans_set_block_dirty(b.buf);

+			return ext4_block_set(ino_ref->fs->bdev, &b);

+		} else {

+			usize sz;

+			/* Copy data from root to child block */

+			sz = leaf_count * sizeof(struct ext4_dir_idx_entry);

+			memcpy(new_en, e, sz);

+			struct ext4_dir_idx_climit *new_climit = (void*)new_en;

+			if (meta_csum)

+				entry_space -= sizeof(struct ext4_dir_idx_tail);

+			USED(entry_space);

+			ext4_dir_dx_climit_set_limit(new_climit, node_limit);

+			/* Set values in root node */

+			struct ext4_dir_idx_climit *new_root_climit = (void *)e;

+			ext4_dir_dx_climit_set_count(new_root_climit, 1);

+			ext4_dir_dx_entry_set_block(e, new_iblk);

+			struct ext4_dir_idx_root *r = (void *)dx_blks[0].b.data;

+			r->info.indirect_levels = 1;

+			/* Add new entry to the path */

+			dxb = dx_blks + 1;

+			dxb->position = dx_blks->position - e + new_en;

+			dxb->entries = new_en;

+			dxb->b = b;

+			*new_dx_block = dxb;

+			ext4_dir_set_dx_csum(ino_ref, (void*)dx_blks[0].b.data);

+			ext4_dir_set_dx_csum(ino_ref, (void*)dx_blks[1].b.data);

+			ext4_trans_set_block_dirty(dx_blks[0].b.buf);

+			ext4_trans_set_block_dirty(dx_blks[1].b.buf);

+		}

+	}

+	return 0;

+}

+int ext4_dir_dx_add_entry(struct ext4_inode_ref *parent,

+			  struct ext4_inode_ref *child, const char *name, u32int name_len)

+{

+	int rc2 = 0;

+	int r;

+	/* Get direct block 0 (index root) */

+	ext4_fsblk_t rblock_addr;

+	r =  ext4_fs_get_inode_dblk_idx(parent, 0, &rblock_addr, false);

+	if (r != 0)

+		return r;

+	struct ext4_fs *fs = parent->fs;

+	struct ext4_block root_blk;

+	r = ext4_trans_block_get(fs->bdev, &root_blk, rblock_addr);

+	if (r != 0)

+		return r;

+	if (!ext4_dir_dx_csum_verify(parent, (void*)root_blk.data)) {

+		ext4_dbg(DEBUG_DIR_IDX,

+			 DBG_WARN "HTree root checksum failed."

+			 "Inode: %ud, "

+			 "Block: %ud\n",

+			 parent->index,

+			 (u32int)0);

+	}

+	/* Initialize hinfo structure (mainly compute hash) */

+	struct ext4_hash_info hinfo;

+	r = ext4_dir_hinfo_init(&hinfo, &root_blk, &fs->sb, name_len, name);

+	if (r != 0) {

+		ext4_block_set(fs->bdev, &root_blk);

+		return EXT4_ERR_BAD_DX_DIR;

+	}

+	/*

+	 * Hardcoded number 2 means maximum height of index

+	 * tree defined in Linux.

+	 */

+	struct ext4_dir_idx_block dx_blks[2];

+	struct ext4_dir_idx_block *dx_blk;

+	struct ext4_dir_idx_block *dx_it;

+	r = ext4_dir_dx_get_leaf(&hinfo, parent, &root_blk, &dx_blk, dx_blks);

+	if (r != 0) {

+		r = EXT4_ERR_BAD_DX_DIR;

+		goto release_index;

+	}

+	/* Try to insert to existing data block */

+	u32int leaf_block_idx = ext4_dir_dx_entry_get_block(dx_blk->position);

+	ext4_fsblk_t leaf_block_addr;

+	r = ext4_fs_get_inode_dblk_idx(parent, leaf_block_idx,

+						&leaf_block_addr, false);

+	if (r != 0)

+		goto release_index;

+	/*

+	 * Check if there is needed to split index node

+	 * (and recursively also parent nodes)

+	 */

+	r = ext4_dir_dx_split_index(parent, dx_blks, dx_blk, &dx_blk);

+	if (r != 0)

+		goto release_target_index;

+	struct ext4_block target_block;

+	r = ext4_trans_block_get(fs->bdev, &target_block, leaf_block_addr);

+	if (r != 0)

+		goto release_index;

+	if (!ext4_dir_csum_verify(parent,(void *)target_block.data)) {

+		ext4_dbg(DEBUG_DIR_IDX,

+				DBG_WARN "HTree leaf block checksum failed."

+				"Inode: %ud, "

+				"Block: %ud\n",

+				parent->index,

+				leaf_block_idx);

+	}

+	/* Check if insert operation passed */

+	r = ext4_dir_try_insert_entry(&fs->sb, parent, &target_block, child,

+					name, name_len);

+	if (r == 0)

+		goto release_target_index;

+	/* Split entries to two blocks (includes sorting by hash value) */

+	struct ext4_block new_block;

+	r = ext4_dir_dx_split_data(parent, &hinfo, &target_block, dx_blk,

+				    &new_block);

+	if (r != 0)

+		goto release_target_index;

+	/* Where to save new entry */

+	u32int blk_hash = ext4_dir_dx_entry_get_hash(dx_blk->position + 1);

+	if (hinfo.hash >= blk_hash)

+		r = ext4_dir_try_insert_entry(&fs->sb, parent, &new_block,

+						child, name, name_len);

+	else

+		r = ext4_dir_try_insert_entry(&fs->sb, parent, &target_block,

+						child, name, name_len);

+	if (r != 0)

+		goto release_target_index;

+	/* Cleanup */

+	r = ext4_block_set(fs->bdev, &new_block);

+	if (r != 0)

+		return r;

+/* Cleanup operations */

+release_target_index:

+	rc2 = r;

+	r = ext4_block_set(fs->bdev, &target_block);

+	if (r != 0)

+		return r;

+release_index:

+	if (r != 0)

+		rc2 = r;

+	dx_it = dx_blks;

+	while (dx_it <= dx_blk) {

+		r = ext4_block_set(fs->bdev, &dx_it->b);

+		if (r != 0)

+			return r;

+		dx_it++;

+	}

+	return rc2;

+}

+int ext4_dir_dx_reset_parent_inode(struct ext4_inode_ref *dir,

+                                   u32int parent_inode)

+{

+	/* Load block 0, where will be index root located */

+	ext4_fsblk_t fblock;

+	int rc = ext4_fs_get_inode_dblk_idx(dir, 0, &fblock, false);

+	if (rc != 0)

+		return rc;

+	struct ext4_block block;

+	rc = ext4_trans_block_get(dir->fs->bdev, &block, fblock);

+	if (rc != 0)

+		return rc;

+	if (!ext4_dir_dx_csum_verify(dir, (void *)block.data)) {

+		ext4_dbg(DEBUG_DIR_IDX,

+			 DBG_WARN "HTree root checksum failed."

+			 "Inode: %ud, "

+			 "Block: %ud\n",

+			 dir->index,

+			 (u32int)0);

+	}

+	/* Initialize pointers to data structures */

+	struct ext4_dir_idx_root *root = (void *)block.data;

+	/* Fill the inode field with a new parent ino. */

+	ext4_dx_dot_en_set_inode(&root->dots[1], parent_inode);

+	ext4_dir_set_dx_csum(dir, (void *)block.data);

+	ext4_trans_set_block_dirty(block.buf);

+	return ext4_block_set(dir->fs->bdev, &block);

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_extent.c

@@ -1,0 +1,2218 @@

+#include "ext4_config.h"

+#include "ext4_debug.h"

+#include "ext4_fs.h"

+#include "ext4_trans.h"

+#include "ext4_blockdev.h"

+#include "ext4_extent.h"

+#include "ext4_inode.h"

+#include "ext4_super.h"

+#include "ext4_crc32.h"

+#include "ext4_balloc.h"

+//#define CONFIG_EXTENT_DEBUG_VERBOSE

+/**@brief Return the extent tree depth

+ * @param inode_ref I-node reference the tree belongs to

+ * @return Depth of extent tree */

+static inline u16int

+ext4_extent_tree_depth(struct ext4_inode_ref *inode_ref)

+{

+	struct ext4_extent_header *eh;

+	eh = ext4_inode_get_extent_header(inode_ref->inode);

+	return ext4_extent_header_get_depth(eh);

+}

+static struct ext4_extent_tail *

+ext4_extent_get_csum_tail(struct ext4_extent_header *eh)

+{

+	return (struct ext4_extent_tail *)(((char *)eh) +

+	    EXT4_EXTENT_TAIL_OFFSET(eh));

+}

+static u32int ext4_extent_block_csum(struct ext4_inode_ref *inode_ref,

+				       struct ext4_extent_header *eh)

+{

+	u32int checksum = 0;

+	struct ext4_sblock *sb = &inode_ref->fs->sb;

+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {

+		u32int ino_index = to_le32(inode_ref->index);

+		u32int ino_gen =

+			to_le32(ext4_inode_get_generation(inode_ref->inode));

+		/* First calculate crc32 checksum against fs uuid */

+		checksum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid,

+				sizeof(sb->uuid));

+		/* Then calculate crc32 checksum against inode number

+		 * and inode generation */

+		checksum = ext4_crc32c(checksum, &ino_index,

+				     sizeof(ino_index));

+		checksum = ext4_crc32c(checksum, &ino_gen,

+				     sizeof(ino_gen));

+		/* Finally calculate crc32 checksum against

+		 * the entire extent block up to the checksum field */

+		checksum = ext4_crc32c(checksum, eh,

+		    EXT4_EXTENT_TAIL_OFFSET(eh));

+	}

+	return checksum;

+}

+static bool

+ext4_extent_verify_block_csum(struct ext4_inode_ref *inode_ref,

+			      struct ext4_block *block)

+{

+	u16int rootdepth;

+	struct ext4_extent_tail *tail;

+	struct ext4_extent_header *eh;

+	rootdepth = ext4_extent_tree_depth(inode_ref);

+	if (!ext4_sb_feature_ro_com(&inode_ref->fs->sb,

+				    EXT4_FRO_COM_METADATA_CSUM))

+		return true;

+	eh = (struct ext4_extent_header *)block->data;

+	if (ext4_extent_header_get_depth(eh) < rootdepth) {

+		tail = ext4_extent_get_csum_tail(eh);

+		return tail->checksum ==

+		    to_le32(ext4_extent_block_csum(inode_ref, eh));

+	}

+	return true;

+}

+static void

+ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref,

+			   struct ext4_extent_header *eh)

+{

+	u16int rootdepth;

+	struct ext4_extent_tail *tail;

+	rootdepth = ext4_extent_tree_depth(inode_ref);

+	if (!ext4_sb_feature_ro_com(&inode_ref->fs->sb,

+				    EXT4_FRO_COM_METADATA_CSUM))

+		return;

+	if (ext4_extent_header_get_depth(eh) < rootdepth) {

+		tail = ext4_extent_get_csum_tail(eh);

+		tail->checksum = to_le32(ext4_extent_block_csum(inode_ref, eh));

+	}

+}

+#ifdef CONFIG_EXTENT_DEBUG_VERBOSE

+static void

+ext4_extent_print_path(struct ext4_inode_ref *inode_ref,

+		       struct ext4_extent_path *path)

+{

+	u16int rootdepth;

+	struct ext4_extent_path *p;

+	rootdepth = ext4_extent_tree_depth(inode_ref);

+	p = path + rootdepth;

+	ext4_dbg(DEBUG_EXTENT,

+		 DBG_INFO "Path address: %p\n", path);

+	while (p >= path) {

+		u16int i;

+		u16int entries =

+		    ext4_extent_header_get_nentries(p->header);

+		u16int limit =

+		    ext4_extent_header_get_max_nentries(p->header);

+		ext4_dbg(DEBUG_EXTENT,

+DBG_INFO "-- Block: %llud, Depth: %uhd, Entries: %uhd, Limit: %uhd\n",

+			 p->block.lb_id, p->depth, entries, limit);

+		for (i = 0; i < entries; i++) {

+			if (p->depth) {

+				struct ext4_extent_index *index;

+				index = EXT4_EXTENT_FIRST_INDEX(p->header) + i;

+				ext4_dbg(DEBUG_EXTENT,

+DBG_INFO "Index: iblock: %ud, fsblock: %llud\n",

+					 ext4_extent_index_get_iblock(index),

+					 ext4_extent_index_get_fblock(index));

+			} else {

+				struct ext4_extent *extent;

+				extent = EXT4_EXTENT_FIRST(p->header) + i;

+				ext4_dbg(DEBUG_EXTENT,

+DBG_INFO "Extent: iblock: %ud, fsblock: %llud, count: %uhd\n",

+					 ext4_extent_get_iblock(extent),

+					 ext4_extent_get_fblock(extent),

+					 ext4_extent_get_nblocks(extent));

+			}

+		}

+		p--;

+	}

+	ext4_dbg(DEBUG_EXTENT,

+		 DBG_INFO "====================\n");

+}

+#else /* CONFIG_EXTENT_DEBUG_VERBOSE */

+#define ext4_extent_print_path(...)

+#endif /* CONFIG_EXTENT_DEBUG_VERBOSE */

+/**@brief Binary search in extent index node.

+ * @param header Extent header of index node

+ * @param index  Output value - found index will be set here

+ * @param iblock Logical block number to find in index node */

+static void ext4_extent_binsearch_idx(struct ext4_extent_header *header,

+				      struct ext4_extent_index **index,

+				      ext4_lblk_t iblock)

+{

+	struct ext4_extent_index *r;

+	struct ext4_extent_index *l;

+	struct ext4_extent_index *m;

+	u16int nentries = ext4_extent_header_get_nentries(header);

+	/* Initialize bounds */

+	l = EXT4_EXTENT_FIRST_INDEX(header) + 1;

+	r = EXT4_EXTENT_FIRST_INDEX(header) + nentries - 1;

+	/* Do binary search */

+	while (l <= r) {

+		m = l + (r - l) / 2;

+		ext4_lblk_t eiiblock = ext4_extent_index_get_iblock(m);

+		if (iblock < eiiblock)

+			r = m - 1;

+		else

+			l = m + 1;

+	}

+	/* Set output value */

+	*index = l - 1;

+}

+/**@brief Binary search in extent leaf node.

+ * @param header Extent header of leaf node

+ * @param extent Output value - found extent will be set here,

+ *               or nil if node is empty

+ * @param iblock Logical block number to find in leaf node */

+static void ext4_extent_binsearch(struct ext4_extent_header *header,

+				  struct ext4_extent **extent,

+				  ext4_lblk_t iblock)

+{

+	struct ext4_extent *r;

+	struct ext4_extent *l;

+	struct ext4_extent *m;

+	u16int nentries = ext4_extent_header_get_nentries(header);

+	if (nentries == 0) {

+		/* this leaf is empty */

+		*extent = nil;

+		return;

+	}

+	/* Initialize bounds */

+	l = EXT4_EXTENT_FIRST(header) + 1;

+	r = EXT4_EXTENT_FIRST(header) + nentries - 1;

+	/* Do binary search */

+	while (l <= r) {

+		m = l + (r - l) / 2;

+		ext4_lblk_t eiblock = ext4_extent_get_iblock(m);

+		if (iblock < eiblock)

+			r = m - 1;

+		else

+			l = m + 1;

+	}

+	/* Set output value */

+	*extent = l - 1;

+}

+static void

+ext4_extent_path_dirty(struct ext4_inode_ref *inode_ref,

+		       struct ext4_extent_path *path,

+		       u16int depth)

+{

+	u16int rootdepth;

+	rootdepth = ext4_extent_tree_depth(inode_ref);

+	if (rootdepth != depth) {

+		struct ext4_extent_path *p;

+		p = path + depth;

+		ext4_extent_block_csum_set(inode_ref, p->header);

+		ext4_trans_set_block_dirty(p->block.buf);

+	} else

+		inode_ref->dirty = true;

+}

+static int

+ext4_extent_path_release(struct ext4_inode_ref *inode_ref,

+			 struct ext4_extent_path *path)

+{

+	int ret = 0;

+	u16int i, rootdepth;

+	rootdepth = ext4_extent_tree_depth(inode_ref);

+	for (i = 0; i < rootdepth; i++) {

+		if (path[i].block.lb_id) {

+			ret = ext4_block_set(inode_ref->fs->bdev,

+					     &path[i].block);

+			if (ret != 0)

+				break;

+		}

+	}

+	return ret;

+}

+/**@brief Physical block allocation hint for extent tree manipulation

+ * routines

+ * @param inode_ref I-node

+ * @return Physical block allocation hint */

+static ext4_fsblk_t

+ext4_extent_tree_alloc_goal(struct ext4_inode_ref *inode_ref)

+{

+	u32int bgid;

+	struct ext4_sblock *sb;

+	sb = &inode_ref->fs->sb;

+	bgid = inode_ref->index / ext4_get32(sb, inodes_per_group);

+	/* Currently for allocations from extent tree manipulation routines,

+	 * we try the blocks in the block group the inode table block refers

+	 * to */

+	return ext4_fs_first_bg_block_no(sb, bgid);

+}

+/**@brief Physical block allocation hint for data blocks routines

+ * @param inode_ref I-node

+ * @param path      path in the extent tree

+ * @param iblock    the starting logical block of the

+ * mapping to be inserted

+ * @return Physical block allocation hint */

+static ext4_fsblk_t

+ext4_extent_data_alloc_goal(struct ext4_inode_ref *inode_ref,

+			    struct ext4_extent_path *path,

+			    ext4_lblk_t iblock)

+{

+	ext4_fsblk_t ret;

+	struct ext4_extent *ext;

+	ext = path[0].extent;

+	if (!ext)

+		/* If there is no mapping yet, we return

+		 * ext4_extent_tree_alloc_goal() as hints */

+		return ext4_extent_tree_alloc_goal(inode_ref) + iblock;

+	/* We want the whole file to be continuous. */

+	if (ext4_extent_get_iblock(ext) < iblock)

+		ret = ext4_extent_get_fblock(ext) +

+		    iblock - ext4_extent_get_iblock(ext);

+	else {

+		if (ext4_extent_get_iblock(ext) - iblock >

+		    ext4_extent_get_fblock(ext))

+			ret = ext4_extent_get_fblock(ext);

+		else

+			ret = ext4_extent_get_fblock(ext) -

+			    (ext4_extent_get_iblock(ext) - iblock);

+	}

+	return ret;

+}

+/**@brief Verify the extent node block is valid

+ * @param inode_ref I-node

+ * @param block     block buffer of the extent node block

+ * @param depth     the depth of extent node wanted

+ * @return true if the block passes verification, otherwise false

+ */

+static bool ext4_extent_block_verify(struct ext4_inode_ref *inode_ref,

+				     struct ext4_block *block,

+				     u16int depth)

+{

+	u32int blocksz;

+	u16int maxnentries;

+	struct ext4_extent_header *eh;

+	eh = (struct ext4_extent_header *)block->data;

+	blocksz = ext4_sb_get_block_size(&inode_ref->fs->sb);

+	/* Check if the magic number of the extent node header is correct */

+	if (ext4_extent_header_get_magic(eh) != EXT4_EXTENT_MAGIC) {

+		ext4_dbg(DEBUG_EXTENT,

+DBG_ERROR "Extent node block header mismatch! Block number: %llud\n",

+			 block->lb_id);

+		return false;

+	}

+	/* Check if the depth field of extent node header matches what the

+	 * caller wants */

+	if (ext4_extent_header_get_depth(eh) != depth) {

+		ext4_dbg(DEBUG_EXTENT,

+DBG_ERROR "Extent node block depth mismatch! Expected: %uhd, Got: %uhd. Block number: %llud\n",

+			 depth, ext4_extent_header_get_depth(eh),

+			 block->lb_id);

+		return false;

+	}

+	/* Check if the non-root node contains entries */

+	if (!ext4_extent_header_get_nentries(eh)) {

+		ext4_dbg(DEBUG_EXTENT,

+DBG_ERROR "Extent node block does not contain any entries! Block number: %llud\n",

+			 block->lb_id);

+		return false;

+	}

+	/* Make sure that the maximum entries field of the

+	 * extent node header is correct */

+	maxnentries = (blocksz - sizeof(struct ext4_extent_header)) /

+	    sizeof(struct ext4_extent);

+	if (ext4_extent_header_get_max_nentries(eh) != maxnentries) {

+		ext4_dbg(DEBUG_EXTENT,

+DBG_ERROR "Incorrect extent node block maximum entries field! Expected: %uhd, Got: %uhd. Block number: %llud\n",

+			 maxnentries,

+			 ext4_extent_header_get_max_nentries(eh),

+			 block->lb_id);

+		return false;

+	}

+	/* Check if the checksum of the block is correct */

+	if (!ext4_extent_verify_block_csum(inode_ref,

+					   block)) {

+		ext4_dbg(DEBUG_EXTENT,

+DBG_ERROR "Extent node block checksum failed! Block number: %llud\n",

+			 block->lb_id);

+		return false;

+	}

+	/* The block passes verification */

+	return true;

+}

+/**@brief Find extent for specified iblock.

+ * This function is used for finding block in the extent tree with

+ * saving the path through the tree for possible future modifications.

+ * @param inode_ref I-node to read extent tree from

+ * @param iblock    Iblock to find extent for

+ * @param ppath  Output value - loaded path from extent tree

+ * @return Error code */

+static int ext4_extent_find_extent(struct ext4_inode_ref *inode_ref,

+				   ext4_lblk_t iblock,

+				   struct ext4_extent_path **ppath)

+{

+	struct ext4_extent_header *eh;

+	int ret;

+	u16int depth;

+	u16int k;

+	struct ext4_extent_path *tpath;

+	depth = ext4_extent_tree_depth(inode_ref);

+	eh = ext4_inode_get_extent_header(inode_ref->inode);

+	/* Added 2 for possible tree growing (1 extra depth) */

+	tpath = ext4_malloc(sizeof(struct ext4_extent_path) * (depth + 2));

+	if (tpath == nil) {

+		werrstr(Enomem);

+		return -1;

+	}

+	/* Zero the path array because we need to make sure that

+	 * lb_id field of block buffer is zero */

+	memset(tpath, 0, sizeof(struct ext4_extent_path) * (depth + 2));

+	/* Initialize structure for algorithm start */

+	k = depth;

+	tpath[k].block = inode_ref->block;

+	tpath[k].header = eh;

+	/* Walk through the extent tree */

+	while ((depth = ext4_extent_header_get_depth(eh)) != 0) {

+		/* Search index in index node by iblock */

+		ext4_extent_binsearch_idx(tpath[k].header,

+					  &tpath[k].index, iblock);

+		tpath[k].depth = depth;

+		tpath[k].extent = nil;

+		assert(tpath[k].index != 0);

+		/* Load information for the next iteration */

+		u64int fblock =

+		    ext4_extent_index_get_fblock(tpath[k].index);

+		struct ext4_block block;

+		ret = ext4_trans_block_get(inode_ref->fs->bdev, &block, fblock);

+		if (ret != 0)

+			goto errout0;

+		if (!ext4_extent_block_verify(inode_ref, &block, depth - 1)) {

+			werrstr(Eio);

+			ret = -1;

+			goto errout0;

+		}

+		k--;

+		eh = (struct ext4_extent_header *)block.data;

+		tpath[k].block = block;

+		tpath[k].header = eh;

+	}

+	tpath[k].depth = 0;

+	tpath[k].extent = nil;

+	tpath[k].index = nil;

+	/* Find extent in the leaf node */

+	ext4_extent_binsearch(tpath[k].header, &tpath[k].extent,

+			      iblock);

+	*ppath = tpath;

+	return 0;

+errout0:

+	/* Put loaded blocks */

+	ext4_extent_path_release(inode_ref, tpath);

+	/* Destroy temporary data structure */

+	ext4_free(tpath);

+	return ret;

+}

+/**@brief Reload the paths in a cursor starting from the level having invalid

+ * pointer

+ * @param inode_ref I-node the extent tree resides in

+ * @param path      Path in the extent tree

+ * @param depth     The level to start the reload at

+ * @param right     Try to load the rightmost children

+ * @return 0 on success, Eio on corrupted block, or return values of

+ * ext4_trans_block_get(). */

+int ext4_extent_reload_paths(struct ext4_inode_ref *inode_ref,

+			     struct ext4_extent_path *path,

+			     u16int depth,

+			     bool right)

+{

+	int ret = 0;

+	struct ext4_extent_header *header;

+	struct ext4_extent_path *p;

+	/* actually we assume our caller starting from index level instead of

+	 * extent level */

+	assert(depth);

+	p = path + depth;

+	header = p->header;

+	/* XXX: the path becomes invalid at the first place... */

+	if (p->index > EXT4_EXTENT_LAST_INDEX(header))

+		p->index = EXT4_EXTENT_LAST_INDEX(header);

+	/* Start reloading all the paths from the child of the specified level

+	 * toward the leaf */

+	for (; p > path; --p) {

+		struct ext4_extent_path *chldp;

+		struct ext4_extent_index *idx;

+		chldp = p - 1;

+		header = p->header; USED(header);

+		idx = p->index;

+		/* Release the buffer of child path if the buffer is still

+		 * valid */

+		if (chldp->block.lb_id) {

+			ret = ext4_block_set(inode_ref->fs->bdev, &chldp->block);

+			if (ret != 0)

+				goto out;

+		}

+		/* Read the block specified by the physical block field of the

+		 * index */

+		ret = ext4_trans_block_get(inode_ref->fs->bdev, &chldp->block,

+					   ext4_extent_index_get_fblock(idx));

+		if (ret != 0)

+			goto out;

+		header = (struct ext4_extent_header *)chldp->block.data;

+		/* Validate the block content before moving on. */

+		if (!ext4_extent_block_verify(inode_ref,

+					      &chldp->block, p->depth - 1)) {

+			werrstr(Eio);

+			ret = -1;

+			goto out;

+		}

+		/* Reset the fields of child path */

+		chldp->header = header;

+		chldp->depth = ext4_extent_header_get_depth(header);

+		if (right) {

+			if (chldp->depth) {

+				chldp->index = EXT4_EXTENT_LAST_INDEX(header);

+				chldp->extent = nil;

+			} else {

+				chldp->extent = EXT4_EXTENT_LAST(header);

+				chldp->index = nil;

+			}

+		} else {

+			if (chldp->depth) {

+				chldp->index = EXT4_EXTENT_FIRST_INDEX(header);

+				chldp->extent = nil;

+			} else {

+				chldp->extent = EXT4_EXTENT_FIRST(header);

+				chldp->index = nil;

+			}

+		}

+	}

+out:

+	return ret;

+}

+/**@brief Seek to the next extent

+ * @param inode_ref I-node the extent tree resides in

+ * @param path      Path in the extent tree

+ * @param nonextp   Output value - whether the current extent is the

+ * right-most extent already

+ * @return 0 on success, Eio on currupted block, or return values of

+ * ext4_trans_block_get(). */

+int ext4_extent_increment(struct ext4_inode_ref *inode_ref,

+			  struct ext4_extent_path *path,

+			  bool *nonextp)

+{

+	int ret = 0;

+	u16int ptr;

+	bool nonext = true;

+	u16int depth = 0;

+	struct ext4_extent_path *p;

+	u16int rootdepth;

+	p = path;

+	rootdepth = ext4_extent_tree_depth(inode_ref);

+	/* Iterate the paths from the leaf to the root */

+	while (depth <= rootdepth) {

+		struct ext4_extent_header *header;

+		if (p->depth) {

+			ptr = p->index -

+			    EXT4_EXTENT_FIRST_INDEX(p->header);

+		} else {

+			ptr = p->extent -

+			    EXT4_EXTENT_FIRST(p->header);

+		}

+		header = p->header;

+		if (ptr < ext4_extent_header_get_nentries(header) - 1)

+			/* We found a path with non-rightmost pointer */

+			break;

+		/* Move to the parent path */

+		p++;

+		depth++;

+	}

+	/* If we can't find a path with a non-rightmost pointer,

+	 * we are already on the last extent, just return in this

+	 * case */

+	if (depth > rootdepth)

+		goto out;

+	/* Increment the pointer once we found a path with non-rightmost

+	 * pointer */

+	if (p->depth)

+		p->index++;

+	else

+		p->extent++;

+	if (depth) {

+		/* We need to reload the paths to leaf if the path iterator

+		 * is not pointing to the leaf */

+		ret = ext4_extent_reload_paths(inode_ref, path, depth, false);

+		if (ret != 0)

+			goto out;

+	}

+	/* Found the next extent */

+	nonext = false;

+out:

+	if (nonextp)

+		*nonextp = nonext;

+	return ret;

+}

+/**@brief Seek to the previous extent

+ * @param inode_ref I-node the extent tree resides in

+ * @param path      Path in the extent tree

+ * @param noprevp   Output value - whether the current extent is the

+ * left-most extent already

+ * @return 0 on success, Eio on currupted block, or return values of

+ * ext4_trans_block_get(). */

+int

+ext4_extent_decrement(struct ext4_inode_ref *inode_ref,

+		      struct ext4_extent_path *path,

+		      bool *noprevp)

+{

+	int ret = 0;

+	u16int ptr;

+	bool noprev = true;

+	u16int depth = 0;

+	struct ext4_extent_path *p;

+	u16int rootdepth;

+	p = path;

+	rootdepth = ext4_extent_tree_depth(inode_ref);

+	/* Iterate the paths from the leaf to the root */

+	while (depth <= rootdepth) {

+		if (p->depth) {

+			ptr = p->index -

+			    EXT4_EXTENT_FIRST_INDEX(p->header);

+		} else {

+			ptr = p->extent -

+			    EXT4_EXTENT_FIRST(p->header);

+		}

+		if (ptr)

+			/* We found a path with non-leftmost pointer */

+			break;

+		/* Move to the parent path */

+		p++;

+		depth++;

+	}

+	/* If we can't find a path with a non-leftmost pointer,

+	 * we are already on the first extent, just return in this

+	 * case */

+	if (depth > rootdepth)

+		goto out;

+	/* Decrement the pointer once we found a path with non-leftmost

+	 * pointer */

+	if (p->depth)

+		p->index--;

+	else

+		p->extent--;

+	if (depth) {

+		/* We need to reload the paths to leaf if the path iterator

+		 * is not pointing to the leaf */

+		ret = ext4_extent_reload_paths(inode_ref, path, depth, true);

+		if (ret != 0)

+			goto out;

+	}

+	/* Found the previous extent */

+	noprev = false;

+out:

+	if (noprevp)

+		*noprevp = noprev;

+	return ret;

+}

+/**@brief Update the index of nodes starting from leaf

+ * @param inode_ref I-node the extent tree resides in

+ * @param path      Path in the extent tree

+ * @param force     set this to true if insertion, deletion or modification

+ * of starting logical block of the first index in a node is made at non-leaf

+ * level */

+static void ext4_extent_update_index(struct ext4_inode_ref *inode_ref,

+				     struct ext4_extent_path *path,

+				     bool force)

+{

+	u16int rootdepth;

+	struct ext4_extent_path *p;

+	rootdepth = ext4_extent_tree_depth(inode_ref);

+	/* Iterate the paths from the parent of the leaf to the root */

+	for (p = path + 1; p <= path + rootdepth; p++) {

+		struct ext4_extent_path *chldp;

+		struct ext4_extent_header *child_header;

+		intptr chldptr;

+		/* This points to the child path of the current path */

+		chldp = p - 1;

+		child_header = chldp->header;

+		if (!chldp->depth)

+			chldptr = chldp->extent -

+				    EXT4_EXTENT_FIRST(child_header);

+		else

+			chldptr = chldp->index -

+			            EXT4_EXTENT_FIRST_INDEX(child_header);

+		/* If the modification on the child node is not made on the

+		 * first slot of the node, we are done */

+		if (chldptr)

+			break;

+		if (p->depth > 1) {

+			struct ext4_extent_index *idx = p->index;

+			struct ext4_extent_index *chldidx =

+					chldp->index;

+			ext4_lblk_t iblock, chldiblock;

+			iblock = ext4_extent_index_get_iblock(idx);

+			chldiblock = ext4_extent_index_get_iblock(chldidx);

+			if (iblock != chldiblock) {

+				/* If the starting logical block of the first

+				 * index of the child node is modified, we

+				 * update the starting logical block of index

+				 * pointing to the child node */

+				ext4_extent_index_set_iblock(idx, chldiblock);

+				ext4_extent_path_dirty(inode_ref, path,

+						       p->depth);

+			} else if (!force)

+				/* We do not need to continue the iteration */

+				break;

+		} else {

+			struct ext4_extent_index *idx = p->index;

+			struct ext4_extent *chldext = chldp->extent;

+			ext4_lblk_t iblock, chldiblock;

+			iblock = ext4_extent_index_get_iblock(idx);

+			chldiblock = ext4_extent_get_iblock(chldext);

+			if (iblock != chldiblock) {

+				/* If the starting logical block of the first

+				 * extent of the child node is modified, we

+				 * update the starting logical block of index

+				 * pointing to the child node */

+				ext4_extent_index_set_iblock(idx, chldiblock);

+				ext4_extent_path_dirty(inode_ref, path,

+						       p->depth);

+			} else if (!force)

+				/* We do not need to continue the iteration */

+				break;

+		}

+	};

+}

+/**@brief Make the tree grow up by one level

+ * @param inode_ref  I-node the extent tree resides in

+ * @param path       Path in the extent tree

+ * @param new_fblock The newly allocated block for tree growth

+ * @return Error code */

+static int ext4_extent_grow_tree(struct ext4_inode_ref *inode_ref,

+				 struct ext4_extent_path *path,

+				 ext4_fsblk_t newfblock)

+{

+	int rc;

+	u16int ptr;

+	struct ext4_block block;

+	ext4_lblk_t chldiblock;

+	u16int rootdepth;

+	struct ext4_block rootblock;

+	struct ext4_extent_header *rooteh;

+	struct ext4_extent_path *nrootp;

+	struct ext4_extent_path *rootp;

+	u32int blocksz;

+	u16int maxnentries;

+	rootdepth = ext4_extent_tree_depth(inode_ref);

+	rootp = path + rootdepth;

+	nrootp = rootp + 1;

+	rootblock = rootp->block;

+	rooteh = rootp->header;

+	blocksz = ext4_sb_get_block_size(&inode_ref->fs->sb);

+	/* Store the extent/index offset so that we can recover the

+	 * pointer to it later */

+	if (rootdepth) {

+		ptr = rootp->index -

+		    EXT4_EXTENT_FIRST_INDEX(rootp->header);

+	} else {

+		ptr = rootp->extent -

+		    EXT4_EXTENT_FIRST(rootp->header);

+	}

+	/* Prepare a buffer for newly allocated block */

+	rc = ext4_trans_block_get_noread(inode_ref->fs->bdev, &block, newfblock);

+	if (rc != 0)

+		return rc;

+	/* Initialize newly allocated block */

+	memset(block.data, 0, blocksz);

+	/* Move data from root to the new block */

+	memcpy(block.data, inode_ref->inode->blocks,

+	       EXT4_INODE_BLOCKS * sizeof(u32int));

+	/* Update old root path */

+	rootp->block = block;

+	rootp->header = (struct ext4_extent_header *)block.data;

+	if (rootp->depth) {

+		rootp->index =

+		    EXT4_EXTENT_FIRST_INDEX(rootp->header) +

+		    ptr;

+		maxnentries =

+		    (blocksz - sizeof(struct ext4_extent_header)) /

+		    sizeof(struct ext4_extent_index);

+		rootp->extent = nil;

+		chldiblock =

+		    ext4_extent_index_get_iblock(EXT4_EXTENT_FIRST_INDEX(rootp->header));

+	} else {

+		rootp->extent =

+			EXT4_EXTENT_FIRST(rootp->header) +

+			ptr;

+		maxnentries =

+		    (blocksz - sizeof(struct ext4_extent_header)) /

+		    sizeof(struct ext4_extent);

+		rootp->index = nil;

+		chldiblock =

+			ext4_extent_get_iblock(EXT4_EXTENT_FIRST(rootp->header));

+	}

+	/* Re-initialize new root metadata */

+	nrootp->depth = rootdepth + 1;

+	nrootp->block = rootblock;

+	nrootp->header = rooteh;

+	nrootp->extent = nil;

+	nrootp->index = EXT4_EXTENT_FIRST_INDEX(nrootp->header);

+	ext4_extent_header_set_depth(nrootp->header, nrootp->depth);

+	/* Create new entry in root */

+	ext4_extent_header_set_nentries(nrootp->header, 1);

+	ext4_extent_index_set_iblock(nrootp->index, chldiblock);

+	ext4_extent_index_set_fblock(nrootp->index, newfblock);

+	/* Since new_root belongs to on-disk inode,

+	 * we don't do checksum here */

+	inode_ref->dirty = true;

+	/* Set upper limit for entries count of old root */

+	ext4_extent_header_set_max_nentries(rootp->header, maxnentries);

+	ext4_extent_path_dirty(inode_ref, path, rootp->depth);

+	return 0;

+}

+/**@brief Do splitting on the tree if the leaf is full

+ * @param inode_ref I-node the extent tree resides in

+ * @param path      Path in the extent tree for possible splitting

+ * @param nslots    number of entries that will be inserted to the

+ * leaf in future.

+ * @return Error code */

+static int ext4_extent_split(struct ext4_inode_ref *inode_ref,

+			     struct ext4_extent_path *path,

+			     u16int nslots)

+{

+	int ret;

+	u16int i;

+	ext4_fsblk_t goal;

+	u16int rootdepth;

+	struct ext4_extent_path *p;

+	u32int blocksz;

+	/* Number of new blocks to be allocated */

+	u16int nnewfblocks = 0;

+	/* Number of node to be split */

+	u16int nsplits = 0;

+	/* Array of new blocks allocated */

+	ext4_fsblk_t *newfblocks;

+	/* The index of the right block inserted last time */

+	ext4_lblk_t lastiblock = 0;

+	/* Whether we updated child path to point to the right block

+	 * at the previous round during splitting */

+	bool prevrblock = false;

+	blocksz = ext4_sb_get_block_size(&inode_ref->fs->sb);

+	rootdepth = ext4_extent_tree_depth(inode_ref);

+	goal = ext4_extent_tree_alloc_goal(inode_ref);

+	/* First calculate how many levels will be touched */

+	for (p = path; p <= path + rootdepth; p++) {

+		u16int entries =

+		    ext4_extent_header_get_nentries(p->header);

+		u16int limit =

+		    ext4_extent_header_get_max_nentries(p->header);

+		assert(entries <= limit);

+		if (!p->depth) {

+			if (entries + nslots <= limit)

+				break;

+		} else {

+			if (entries < limit)

+				break;

+		}

+		/* We have to split a node when the tree is full */

+		nnewfblocks++;

+		nsplits++;

+	}

+	if (!nnewfblocks)

+		return 0;

+	/* Allocate the array for storing newly allocated blocks */

+	newfblocks = ext4_malloc(sizeof(ext4_fsblk_t) * nnewfblocks);

+	if (!newfblocks) {

+		werrstr(Enomem);

+		return -1;

+	}

+	for (i = 0; i < nnewfblocks; i++) {

+		ret = ext4_balloc_alloc_block(inode_ref, goal, newfblocks + i);

+		if (ret != 0)

+			return ret;

+	}

+	ext4_dbg(DEBUG_EXTENT,

+		 DBG_INFO "nnewfblocks: %uhd rootdepth: %uhd\n",

+		 nnewfblocks, rootdepth);

+	/* If number of blocks to be allocated is greater than

+	 * the depth of root we have to grow the tree */

+	if (nnewfblocks == rootdepth + 1) {

+		ext4_dbg(DEBUG_EXTENT, "Growing: \n");

+		nsplits--;

+		ret = ext4_extent_grow_tree(inode_ref,

+					    path, newfblocks[rootdepth]);

+		if (ret != 0)

+			goto finish;

+		ext4_extent_print_path(inode_ref, path);

+		/* If we are moving the in-inode leaf to on-block leaf.

+		 * we do not need further actions. */

+		if (!rootdepth)

+			goto finish;

+		++rootdepth; USED(rootdepth);

+	}

+	/* Start splitting */

+	p = path;

+	ext4_dbg(DEBUG_EXTENT, DBG_INFO "Start splitting: \n");

+	for (i = 0; i < nsplits; i++, p++) {

+		struct ext4_extent_header *header;

+		u16int entries =

+		    ext4_extent_header_get_nentries(p->header);

+		u16int limit =

+		    ext4_extent_header_get_max_nentries(p->header);

+		/* The entry we start shifting to the right block */

+		u16int split_ptr = entries / 2;

+		/* The number of entry the right block will have */

+		u16int right_entries = entries - split_ptr;

+		/* The current entry */

+		u16int curr_ptr;

+		ext4_lblk_t riblock;

+		struct ext4_block block;

+		ret = ext4_trans_block_get_noread(inode_ref->fs->bdev,

+						  &block, newfblocks[i]);

+		if (ret != 0)

+			goto finish;

+		/* Initialize newly allocated block and remember it */

+		memset(block.data, 0, blocksz);

+		header = (void *)block.data;

+		/* Initialize on-disk structure (header) */

+		ext4_extent_header_set_nentries(header,

+				right_entries);

+		ext4_extent_header_set_max_nentries(header, limit);

+		ext4_extent_header_set_magic(header, EXT4_EXTENT_MAGIC);

+		ext4_extent_header_set_depth(header, p->depth);

+		ext4_extent_header_set_generation(header, 0);

+		/* Move some entries from old block to new block */

+		if (p->depth) {

+			struct ext4_extent_index *left_index =

+				EXT4_EXTENT_FIRST_INDEX(p->header);

+			struct ext4_extent_index *split_index =

+				left_index + split_ptr;

+			riblock = ext4_extent_index_get_iblock(split_index);

+			ext4_dbg(DEBUG_EXTENT,

+				 DBG_INFO "depth: %ud, riblock: %ud\n",

+				 p->depth, riblock);

+			curr_ptr = p->index - left_index;

+			memcpy(EXT4_EXTENT_FIRST_INDEX(header),

+			       split_index,

+			       right_entries * EXT4_EXTENT_INDEX_SIZE);

+			memset(split_index, 0,

+			       right_entries * EXT4_EXTENT_INDEX_SIZE);

+		} else {

+			struct ext4_extent *left_extent =

+				EXT4_EXTENT_FIRST(p->header);

+			struct ext4_extent *split_extent =

+				left_extent + split_ptr;

+			riblock = ext4_extent_get_iblock(split_extent);

+			ext4_dbg(DEBUG_EXTENT,

+				 DBG_INFO "depth: %ud, riblock: %ud\n",

+				 p->depth, riblock);

+			curr_ptr = p->extent - left_extent;

+			memcpy(EXT4_EXTENT_FIRST(header),

+			       split_extent,

+			       right_entries * EXT4_EXTENT_SIZE);

+			memset(split_extent, 0,

+			       right_entries * EXT4_EXTENT_SIZE);

+		}

+		/* Set entries count in left node */

+		ext4_extent_header_set_nentries(p->header,

+						entries - right_entries);

+		/* Decide whether we need to update the path to

+		 * point to right block or not */

+		if (curr_ptr >= split_ptr) {

+			/* Update the checksum for the left block */

+			ext4_extent_path_dirty(inode_ref, path, p->depth);

+			/* Put back the left block */

+			ret = ext4_block_set(inode_ref->fs->bdev,

+					     &p->block);

+			if (ret != 0)

+				goto finish;

+			/* Update pointers in extent path structure to

+			 * point to right block */

+			p->block = block;

+			p->header = (void *)block.data;

+			if (p->depth) {

+				p->index =

+				    EXT4_EXTENT_FIRST_INDEX(p->header) +

+				    curr_ptr - split_ptr;

+			} else {

+				p->extent =

+				    EXT4_EXTENT_FIRST(p->header) +

+				    curr_ptr - split_ptr;

+			}

+		} else {

+			/* Update the checksum for the right block */

+			ext4_extent_block_csum_set(inode_ref, header);

+			ext4_trans_set_block_dirty(block.buf);

+			/* Put back the right block */

+			ret = ext4_block_set(inode_ref->fs->bdev,

+					     &block);

+			if (ret != 0)

+				goto finish;

+		}

+		/* Append an index after the current index */

+		if (p->depth) {

+			struct ext4_extent_index *index = p->index + 1;

+			/* If we updated the path to right block in the previous

+			 * round, we update the pointer in the path to point to

+			 * the right block */

+			if (prevrblock)

+				p->index++;

+			if (index <= EXT4_EXTENT_LAST_INDEX(p->header)) {

+				u16int nindex =

+					EXT4_EXTENT_LAST_INDEX(p->header) -

+					index + 1;

+				memmove(index + 1,

+					index,

+					nindex * EXT4_EXTENT_INDEX_SIZE);

+			}

+			memset(index, 0, EXT4_EXTENT_INDEX_SIZE);

+			ext4_extent_index_set_iblock(index, lastiblock);

+			ext4_extent_index_set_fblock(index, newfblocks[i - 1]);

+			entries = ext4_extent_header_get_nentries(p->header);

+			ext4_extent_header_set_nentries(p->header,

+					entries + 1);

+		}

+		ext4_extent_path_dirty(inode_ref, path, p->depth);

+		/* We may have updated the path to right block in this round */

+		prevrblock = curr_ptr >= split_ptr;

+		/* We also update the lastiblock variable to the index of the

+		 * right block */

+		lastiblock = riblock;

+	}

+	/* Append an index after the current index */

+	if (p->depth) {

+		struct ext4_extent_index *index = p->index + 1;

+		u16int entries =

+		    ext4_extent_header_get_nentries(p->header);

+		/* If we updated the path to right block in the previous

+		 * round, we update the pointer in the path to point to

+		 * the right block */

+		if (prevrblock)

+			p->index++;

+		if (index <= EXT4_EXTENT_LAST_INDEX(p->header)) {

+			u16int nindex =

+				EXT4_EXTENT_LAST_INDEX(p->header) -

+				index + 1;

+			memmove(index + 1,

+				index,

+				nindex * EXT4_EXTENT_INDEX_SIZE);

+		}

+		memset(index, 0, EXT4_EXTENT_INDEX_SIZE);

+		ext4_extent_index_set_iblock(index, lastiblock);

+		ext4_extent_index_set_fblock(index, newfblocks[i - 1]);

+		ext4_extent_header_set_nentries(p->header,

+				entries + 1);

+		ext4_extent_path_dirty(inode_ref, path, p->depth);

+	}

+	ret = 0;

+finish:

+	if (ret != 0)

+		for (i = 0; i < nnewfblocks; i++)

+			ext4_balloc_free_block(inode_ref, newfblocks[i]);

+	ext4_free(newfblocks);

+	return ret;

+}

+/**@brief Insert an extent into the extent tree

+ * @param inode_ref I-node the extent tree resides in

+ * @param path      Path in the extent tree for possible splitting

+ * @param ext       Extent to be inserted

+ * @return Error code */

+static int ext4_extent_insert(struct ext4_inode_ref *inode_ref,

+			      struct ext4_extent_path *path,

+			      struct ext4_extent *ext)

+{

+	int ret;

+	u16int entries;

+	struct ext4_extent_path *p;

+	/* Split and grow the tree if necessary */

+	ret = ext4_extent_split(inode_ref, path, 1);

+	if (ret != 0)

+		return ret;

+	p = path;

+	entries = ext4_extent_header_get_nentries(p->header);

+	ext4_dbg(DEBUG_EXTENT, DBG_INFO "After splitting: \n");

+	ext4_extent_print_path(inode_ref, path);

+	if (!p->extent) {

+		p->extent = EXT4_EXTENT_FIRST(p->header);

+	} else {

+		ext4_lblk_t iblock;

+		iblock = ext4_extent_get_iblock(p->extent);

+		if (ext4_extent_get_iblock(ext) > iblock)

+			p->extent++;

+	}

+	if (p->extent <= EXT4_EXTENT_LAST(p->header)) {

+		u16int nextent =

+			EXT4_EXTENT_LAST(p->header) -

+			p->extent + 1;

+		ext4_dbg(DEBUG_EXTENT,

+			 DBG_INFO "%uhd extents to be shifted at leaf\n",

+			 nextent);

+		memmove(p->extent + 1,

+			p->extent,

+			nextent * EXT4_EXTENT_SIZE);

+	}

+	memcpy(p->extent, ext, EXT4_EXTENT_SIZE);

+	ext4_extent_header_set_nentries(p->header,

+					entries + 1);

+	ext4_extent_path_dirty(inode_ref, path, p->depth);

+	ext4_dbg(DEBUG_EXTENT, DBG_INFO "Before updating indice: \n");

+	ext4_extent_print_path(inode_ref, path);

+	/* Update the index of the first entry in parents node */

+	ext4_extent_update_index(inode_ref, path, false);

+	ext4_dbg(DEBUG_EXTENT, DBG_INFO "At the end: \n");

+	ext4_extent_print_path(inode_ref, path);

+	return ret;

+}

+/**@brief Delete an item from the node at @depth pointed

+ * @param inode_ref I-node the extent tree resides in

+ * @param path      Path in the extent tree for possible splitting

+ * @param depth     The level of the node to be operated on

+ * @return Error code */

+static void

+ext4_extent_delete_item(struct ext4_inode_ref *inode_ref,

+			struct ext4_extent_path *path,

+			u16int depth)

+{

+	u16int nitems;

+	struct ext4_extent_header *hdr;

+	struct ext4_extent_path *p;

+	p = path + depth;

+	hdr = p->header;

+	assert(ext4_extent_header_get_nentries(hdr));

+	if (p->depth) {

+		struct ext4_extent_index *idx;

+		idx = p->index;

+		nitems = EXT4_EXTENT_LAST_INDEX(hdr) - (idx + 1) + 1;

+		if (nitems) {

+			memmove(idx, idx + 1,

+				nitems * EXT4_EXTENT_INDEX_SIZE);

+			memset(EXT4_EXTENT_LAST(hdr), 0,

+			       EXT4_EXTENT_INDEX_SIZE);

+		} else {

+			memset(idx, 0, EXT4_EXTENT_INDEX_SIZE);

+		}

+	} else {

+		struct ext4_extent *ext;

+		ext = p->extent;

+		nitems = EXT4_EXTENT_LAST(hdr) - (ext + 1) + 1;

+		if (nitems) {

+			memmove(ext, ext + 1,

+				nitems * EXT4_EXTENT_SIZE);

+			memset(EXT4_EXTENT_LAST(hdr), 0,

+			       EXT4_EXTENT_SIZE);

+		} else {

+			memset(ext, 0, EXT4_EXTENT_SIZE);

+		}

+	}

+	nitems = ext4_extent_header_get_nentries(hdr) - 1;

+	ext4_extent_header_set_nentries(hdr,

+					nitems);

+	ext4_extent_path_dirty(inode_ref, path, p->depth);

+}

+/**@brief Remove extents in a leaf starting

+ * from the current extent and having

+ * key less than or equal to @toiblock.

+ * @param inode_ref I-node the tree resides in

+ * @param path      Path in the extent tree

+ * @param toiblock  The logical block

+ * @param stopp     Output value to tell whether the caller should

+ * stop deletion. Will be set to true if an extent having key greater

+ * than @toiblock is met.

+ * @return 0 if there is no error, or return values of blocks

+ * freeing routine. */

+static int

+ext4_extent_delete_leaf(struct ext4_inode_ref *inode_ref,

+			struct ext4_extent_path *path,

+			ext4_lblk_t toiblock,

+			bool *stopp)

+{

+	int ret = 0;

+	u16int nitems;

+	struct ext4_extent *ext;

+	struct ext4_extent_header *hdr;

+	struct ext4_extent_path *p;

+	p = path;

+	*stopp = false;

+	while (1) {

+		bool unwritten;

+		u16int ptr;

+		u16int len;

+		u16int flen;

+		ext4_lblk_t endiblock;

+		ext4_lblk_t startiblock;

+		ext4_fsblk_t blocknr;

+		hdr = p->header;

+		nitems = ext4_extent_header_get_nentries(hdr);

+		ptr = p->extent - EXT4_EXTENT_FIRST(hdr);

+		assert(nitems > 0);

+		ext = p->extent;

+		blocknr = ext4_extent_get_fblock(ext);

+		startiblock = ext4_extent_get_iblock(ext);

+		endiblock = startiblock + ext4_extent_get_nblocks(ext) - 1;

+		len = endiblock - startiblock + 1;

+		unwritten = EXT4_EXT_IS_UNWRITTEN(ext);

+		/* We have to stop if the extent's key

+		 * is greater than @toiblock. */

+		if (toiblock < startiblock) {

+			*stopp = true;

+			break;

+		}

+		if (toiblock < endiblock) {

+			/* In case @toiblock is smaller than the last

+			 * logical block of the extent, we do not

+			 * need to delete the extent. We modify it only. */

+			/* Unmap the underlying blocks. */

+			flen = toiblock - startiblock + 1;

+			ext4_dbg(DEBUG_EXTENT,

+				 DBG_INFO "Freeing: %llud:%uhd\n",

+				 blocknr, flen);

+			ext4_balloc_free_blocks(inode_ref, blocknr, flen);

+			/* Adjust the starting block and length of the

+			 * current extent. */

+			blocknr += flen;

+			startiblock = toiblock + 1;

+			len = endiblock - startiblock + 1;

+			ext4_extent_set_iblock(ext, startiblock);

+			ext4_extent_set_nblocks(ext, len, unwritten);

+			ext4_extent_set_fblock(ext, blocknr);

+			ext4_extent_path_dirty(inode_ref, path, p->depth);

+			*stopp = 1;

+			break;

+		}

+		/* Delete the extent pointed to by the path. */

+		ext4_extent_delete_item(inode_ref, path, 0);

+		nitems--;

+		/* Unmap the underlying blocks. */

+		flen = len;

+		ext4_dbg(DEBUG_EXTENT,

+			 DBG_INFO "Freeing: %llud:%uhd\n",

+			 blocknr, flen);

+		ext4_balloc_free_blocks(inode_ref, blocknr, flen);

+		/* There are no more items we could delete. */

+		if (ptr >= nitems)

+			break;

+	}

+	return ret;

+}

+/**@brief Remove the current index at specified level.

+ * @param cur   Cursor to an extent tree

+ * @param depth The level where deletion takes place at

+ * @return 0 if there is no error, or return values of blocks

+ * freeing routine. */

+static int

+ext4_extent_delete_node(struct ext4_inode_ref *inode_ref,

+			struct ext4_extent_path *path,

+			u16int depth)

+{

+	int ret = 0;

+	ext4_fsblk_t fblock;

+	struct ext4_extent_index *idx;

+	struct ext4_extent_header *hdr;

+	struct ext4_extent_path *p;

+	/* If we leave nothing in the node after deletion of

+	 * an item, we free the block and delete the index

+	 * of the node. Get the respective key of the node

+	 * in the parent level */

+	p = path + depth;

+	hdr = p->header;

+	assert(ext4_extent_header_get_nentries(hdr) > 0);

+	idx = p->index;

+	fblock = ext4_extent_index_get_fblock(idx);

+	/* Delete the index pointed to by the path. */

+	ext4_extent_delete_item(inode_ref, path, depth);

+	/* Free the block of it. */

+	ext4_dbg(DEBUG_EXTENT,

+		 DBG_INFO "Freeing: %llud:%uhd\n",

+		 fblock, 1);

+	ext4_balloc_free_blocks(inode_ref, fblock, 1);

+	return ret;

+}

+/**@brief Delete the mapping in extent tree starting from \p fromiblock to

+ * \p toiblock inclusively.

+ * @param cur Cursor to an extent tree

+ * @return 0 on success, ENOENT if there is no item to be deleted,

+ * return values of ext4_ext_increment(), ext4_ext_insert(),

+ * ext4_ext_delete_leaf(), ext4_ext_delete_node() ext4_ext_reload_paths(),

+ * ext4_ext_tree_shrink(). Cursor MUST be discarded after deletion.

+ */

+int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref,

+			     ext4_lblk_t fromiblock,

+			     ext4_lblk_t toiblock)

+{

+	int ret;

+	u16int nitems;

+	int rootdepth;

+	struct ext4_extent_header *hdr;

+	struct ext4_extent *ext;

+	ext4_lblk_t endiblock;

+	ext4_lblk_t startiblock;

+	struct ext4_extent_path *path, *p;

+	rootdepth = ext4_extent_tree_depth(inode_ref);

+	ret = ext4_extent_find_extent(inode_ref, fromiblock, &path);

+	if (ret != 0)

+		return ret;

+	p = path;

+	hdr = p->header; USED(hdr);

+	/* We return 0 even if the whole extent tree is empty. */

+	if (!ext4_extent_header_get_nentries(path->header))

+		goto out;

+	/* Calculate the last logical block of the current extent. */

+	ext4_dbg(DEBUG_EXTENT, DBG_INFO "At start of remove_space: \n");

+	ext4_extent_print_path(inode_ref, path);

+	ext = p->extent;

+	startiblock = ext4_extent_get_iblock(ext);

+	endiblock = startiblock + ext4_extent_get_nblocks(ext) - 1;

+	ext4_dbg(DEBUG_EXTENT,

+		 DBG_INFO "Extent: %ud:%uhd\n",

+		 startiblock, endiblock);

+	if (fromiblock > endiblock) {

+		bool nonext;

+		/* The last logical block of the current extent is smaller

+		 * than the first logical block we are going to remove,

+		 * thus we increment the extent pointer of the cursor. */

+		/* Increment the extent pointer to point to the

+		 * next extent. */

+		ret = ext4_extent_increment(inode_ref, path, &nonext);

+		if (ret != 0)

+			goto out;

+		/* The current extent is already the last extent in

+		 * the tree, so we just return success here. */

+		if (nonext)

+			goto out;

+	} else if (fromiblock > startiblock) {

+		bool unwritten;

+		u16int len;

+		/* @fromiblock is in the range of the current extent,

+		 * but does not sit right on the starting block.

+		 *

+		 * In this case we need to modify the current extent.

+		 * and free some blocks, since we do not really want

+		 * to remove and reinsert a new one. */

+		len = fromiblock - startiblock;

+		unwritten = EXT4_EXT_IS_UNWRITTEN(ext);

+		ext4_extent_set_nblocks(ext, len, unwritten);

+		ext4_extent_path_dirty(inode_ref, path, p->depth);

+		/* Free the range of blocks starting from @fromiblock

+		 * up to either @endiblock or @toiblock. */

+		if (toiblock < endiblock) {

+			u16int flen;

+			ext4_fsblk_t blocknr;

+			struct ext4_extent next;

+			/* In case we free up space inside an extent

+			 * while not touching both ends, we need to

+			 * unavoidably insert a new extent right after

+			 * the modified current extent, and that may

+			 * cause tree splitting. */

+			/* Now we need to free up space first. */

+			flen = toiblock - fromiblock + 1;

+			blocknr = ext4_extent_get_fblock(ext) + len;

+			ext4_dbg(DEBUG_EXTENT,

+				 DBG_INFO "Freeing: %llud:%uhd\n",

+				 blocknr, flen);

+			ext4_balloc_free_blocks(inode_ref, blocknr, flen);

+			blocknr += flen;

+			startiblock = fromiblock + flen;

+			len = endiblock - startiblock + 1;

+			ext4_extent_set_iblock(&next, startiblock);

+			ext4_extent_set_nblocks(&next, len, unwritten);

+			ext4_extent_set_fblock(&next, blocknr);

+			ret = ext4_extent_insert(inode_ref, path, &next);

+			/* After we free up the space and insert a new

+			 * extent, we are done. */

+			goto out;

+		} else {

+			bool nonext;

+			u16int flen;

+			ext4_fsblk_t blocknr;

+			/* Otherwise we do not need any insertion,

+			 * which also means that no extra space may be

+			 * allocated for tree splitting. */

+			flen = endiblock - fromiblock + 1;

+			blocknr = ext4_extent_get_fblock(ext) + len;

+			/* Now we need to free up space first. */

+			ext4_dbg(DEBUG_EXTENT,

+				 DBG_INFO "Freeing: %llud:%uhd\n",

+				 blocknr, flen);

+			ext4_balloc_free_blocks(inode_ref, blocknr, flen);

+			/* Increment the extent pointer to point to the

+			 * next extent. */

+			ret = ext4_extent_increment(inode_ref, path, &nonext);

+			if (ret != 0 || nonext)

+				goto out;

+		}

+	}

+	while (p <= path + rootdepth) {

+		struct ext4_extent_path *chldp;

+		hdr = p->header;

+		if (!p->depth) {

+			bool stop;

+			/* Delete as much extents as we can. */

+			ret = ext4_extent_delete_leaf(inode_ref,

+						      path,

+						      toiblock,

+						      &stop);

+			if (ret != 0)

+				goto out;

+			if (stop) {

+				/* Since the current extent has its logical

+				 * block number greater than @toiblock,

+				 * we are done. */

+				break;

+			}

+			/* Since there are no more items in the leaf,

+			 * we have to go one level above to switch to the

+			 * next leaf. */

+			p++;

+			continue;

+		}

+		chldp = p - 1;

+		nitems = ext4_extent_header_get_nentries(chldp->header);

+		/* Now we don't need the children path anymore. */

+		ext4_block_set(inode_ref->fs->bdev, &chldp->block);

+		if (!nitems) {

+			ret = ext4_extent_delete_node(inode_ref, path, p->depth);

+			if (ret != 0)

+				goto out;

+			if (p->index > EXT4_EXTENT_LAST_INDEX(hdr)) {

+				/* Go one level above */

+				p++;

+			} else {

+				ret = ext4_extent_reload_paths(inode_ref, path, p->depth, false);

+				if (ret != 0)

+					goto out;

+				/* Go to the bottom level (aka the leaf). */

+				p = path;

+			}

+		} else {

+			if (p->index == EXT4_EXTENT_LAST_INDEX(hdr)) {

+				/* Go one level above */

+				p++;

+			} else {

+				p->index++;

+				ret = ext4_extent_reload_paths(inode_ref, path, p->depth, false);

+				if (ret != 0)

+					goto out;

+				/* Go to the bottom level (aka the leaf). */

+				p = path;

+			}

+		}

+	}

+	/* The above code can only exit in either situations:

+	 *

+	 * 1. We found that there is no more extents at the right

+	 *    (p < path)

+	 * 2. We found that the next extent has key larger than @toiblock

+	 *    (p at leaf) */

+	assert(p == path || p > path + rootdepth);

+	if (p == path) {

+		/* We might have removed the leftmost key in the node,

+		 * so we need to update the first key of the right

+		 * sibling at every level until we meet a non-leftmost

+		 * key. */

+		ext4_extent_update_index(inode_ref, path, true);

+	} else {

+		/* Put loaded blocks. We won't double-release

+		 * in this case since the depth of tree will

+		 * be reset to 0. */

+		ext4_extent_path_release(inode_ref, path);

+		hdr = ext4_inode_get_extent_header(inode_ref->inode);

+		if (!ext4_extent_header_get_nentries(hdr)) {

+			/* For empty root we need to make sure that the

+			 * depth of the root level is 0. */

+			ext4_extent_header_set_nentries(hdr, 0);

+			ext4_extent_header_set_depth(hdr, 0);

+			inode_ref->dirty = true;

+		}

+	}

+out:

+	/* Put loaded blocks */

+	ext4_extent_path_release(inode_ref, path);

+	/* Destroy temporary data structure */

+	ext4_free(path);

+	return ret;

+}

+/**@brief Zero a range of blocks

+ * @param inode_ref   I-node

+ * @param fblock      starting block number to be zeroed

+ * @param nblocks     number of blocks to be zeroed

+ * @return Error code */

+static int ext4_extent_zero_fblocks(struct ext4_inode_ref *inode_ref,

+				    ext4_fsblk_t fblock,

+				    ext4_lblk_t nblocks)

+{

+	int ret = 0;

+	ext4_lblk_t i;

+	u32int blocksz;

+	blocksz = ext4_sb_get_block_size(&inode_ref->fs->sb);

+	for (i = 0; i < nblocks; i++) {

+		struct ext4_block bh = EXT4_BLOCK_ZERO();

+		ret = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh,

+						  fblock + i);

+		if (ret != 0)

+			break;

+		memset(bh.data, 0, blocksz);

+		ext4_trans_set_block_dirty(bh.buf);

+		ret = ext4_block_set(inode_ref->fs->bdev, &bh);

+		if (ret != 0)

+			break;

+	}

+	return ret;

+}

+/**@brief Convert unwritten mapping to written one

+ * @param inode_ref   I-node

+ * @param path        Path in the extent tree

+ * @param iblock      starting logical block to be converted

+ * @param nblocks     number of blocks to be converted

+ * @return Error code */

+int ext4_extent_convert_written(struct ext4_inode_ref *inode_ref,

+				struct ext4_extent_path *path,

+				ext4_lblk_t iblock,

+				ext4_lblk_t nblocks)

+{

+	int ret;

+	ext4_lblk_t eiblock;

+	ext4_lblk_t enblocks;

+	ext4_fsblk_t efblock;

+	struct ext4_extent *ext;

+	ext = path[0].extent;

+	assert(ext);

+	eiblock = ext4_extent_get_iblock(ext);

+	enblocks = ext4_extent_get_nblocks(ext);

+	efblock = ext4_extent_get_fblock(ext);

+	assert(EXT4_EXTENT_IN_RANGE(iblock, eiblock, enblocks));

+	/* There are four cases we need to handle */

+	if (iblock == eiblock && nblocks == enblocks) {

+		/* Case 1: the whole extent has to be converted.

+		 * This is the simplest scenario. We just need

+		 * to mark the extent "written", and zero the

+		 * blocks covered by the extent */

+		ret = ext4_extent_zero_fblocks(inode_ref, efblock, enblocks);

+		if (ret != 0)

+			return ret;

+		EXT4_EXT_SET_WRITTEN(ext);

+		ext4_extent_path_dirty(inode_ref, path, 0);

+	} else if (iblock == eiblock) {

+		/* Case 2: convert the first part of the extent to written

+		 * and insert an unwritten extent after that */

+		ext4_lblk_t newiblock;

+		ext4_lblk_t newnblocks;

+		ext4_fsblk_t newfblock;

+		struct ext4_extent insext;

+		/* The new extent we are going to insert */

+		newiblock = eiblock + nblocks;

+		newnblocks = eiblock + enblocks - newiblock;

+		newfblock = efblock + nblocks;

+		/* Zero the blocks covered by the first part of the extent */

+		ret = ext4_extent_zero_fblocks(inode_ref,

+					       efblock + iblock - eiblock,

+					       nblocks);

+		if (ret != 0)

+			return ret;

+		/* Trim the current extent and convert the extent to written */

+		ext4_extent_set_nblocks(ext, enblocks - nblocks, false);

+		ext4_extent_path_dirty(inode_ref, path, 0);

+		/* Insert the new extent */

+		ext4_extent_set_iblock(&insext, newiblock);

+		ext4_extent_set_nblocks(&insext, newnblocks, true);

+		ext4_extent_set_fblock(&insext, newfblock);

+		ret = ext4_extent_insert(inode_ref, path, &insext);

+		if (ret != 0)

+			/* In case when something happens during insertion

+			 * we revert the trimming of the current extent */

+			ext4_extent_set_nblocks(ext, nblocks, true);

+	} else if (iblock + nblocks == eiblock + enblocks) {

+		/* Case 3: convert the second part of the extent to written.

+		 * We insert an written extent after the current extent */

+		ext4_lblk_t newiblock;

+		ext4_lblk_t newnblocks;

+		ext4_fsblk_t newfblock;

+		struct ext4_extent insext;

+		/* The new extent we are going to insert */

+		newiblock = iblock;

+		newnblocks = nblocks;

+		newfblock = efblock + iblock - eiblock;

+		/* Zero the blocks covered by the first part of the extent */

+		ret = ext4_extent_zero_fblocks(inode_ref, newfblock, newnblocks);

+		if (ret != 0)

+			return ret;

+		/* Trim the current extent */

+		ext4_extent_set_nblocks(ext, enblocks - nblocks, true);

+		ext4_extent_path_dirty(inode_ref, path, 0);

+		/* Insert the new extent */

+		ext4_extent_set_iblock(&insext, newiblock);

+		ext4_extent_set_nblocks(&insext, newnblocks, false);

+		ext4_extent_set_fblock(&insext, newfblock);

+		ret = ext4_extent_insert(inode_ref, path, &insext);

+		if (ret != 0)

+			/* In case when something happens during insertion

+			 * we revert the trimming of the current extent */

+			ext4_extent_set_nblocks(ext, nblocks, true);

+	} else {

+		/* Case 4: convert the middle part of the extent to written.

+		 * We insert one written extent, follow by an unwritten

+		 * extent */

+		ext4_lblk_t newiblock[2];

+		ext4_lblk_t newnblocks[2];

+		ext4_fsblk_t newfblock[2];

+		struct ext4_extent insext;

+		/* The new extents we are going to insert */

+		newiblock[0] = iblock;

+		newnblocks[0] = nblocks;

+		newfblock[0] = efblock + iblock - eiblock;

+		newiblock[1] = iblock + nblocks;

+		newnblocks[1] = eiblock + enblocks - newiblock[1];

+		newfblock[1] = newfblock[0] + nblocks;

+		/* Zero the blocks covered by the written extent */

+		ret = ext4_extent_zero_fblocks(inode_ref, newfblock[0],

+					       newnblocks[0]);

+		if (ret != 0)

+			return ret;

+		/* We don't want to fail in the middle because we

+		 * run out of space. From now on the subsequent

+		 * insertions cannot fail */

+		ret = ext4_extent_split(inode_ref, path, 2);

+		if (ret != 0)

+			return ret;

+		/* Trim the current extent */

+		ext4_extent_set_nblocks(ext,

+					enblocks - newnblocks[0] - newnblocks[1],

+					true);

+		ext4_extent_path_dirty(inode_ref, path, 0);

+		/* Insert the written extent first */

+		ext4_extent_set_iblock(&insext, newiblock[0]);

+		ext4_extent_set_nblocks(&insext, newnblocks[0], false);

+		ext4_extent_set_fblock(&insext, newfblock[0]);

+		ret = ext4_extent_insert(inode_ref, path, &insext);

+		assert(ret == 0);

+		/* Then insert the unwritten extent */

+		ext4_extent_set_iblock(&insext, newiblock[1]);

+		ext4_extent_set_nblocks(&insext , newnblocks[1], true);

+		ext4_extent_set_fblock(&insext, newfblock[1]);

+		ret = ext4_extent_insert(inode_ref, path, &insext);

+		assert(ret == 0);

+	}

+	return ret;

+}

+/**@brief Check if the second extent can be appended to the first extent

+ * @param ext  the first extent

+ * @param ext2 the second extent

+ * @return true if the two extents can be merged, otherwise false */

+static bool ext4_extent_can_append(struct ext4_extent *ext,

+				   struct ext4_extent *ext2)

+{

+	bool unwritten;

+	ext4_lblk_t eiblock[2];

+	ext4_lblk_t enblocks[2];

+	ext4_fsblk_t efblock[2];

+	eiblock[0] = ext4_extent_get_iblock(ext);

+	enblocks[0] = ext4_extent_get_nblocks(ext);

+	efblock[0] = ext4_extent_get_fblock(ext);

+	eiblock[1] = ext4_extent_get_iblock(ext2);

+	enblocks[1] = ext4_extent_get_nblocks(ext2);

+	efblock[1] = ext4_extent_get_fblock(ext2);

+	/* We can't merge an unwritten extent with a written

+	 * extent */

+	if (EXT4_EXT_IS_UNWRITTEN(ext) != EXT4_EXT_IS_UNWRITTEN(ext2))

+		return false;

+	unwritten = EXT4_EXT_IS_UNWRITTEN(ext);

+	/* Since the starting logical block of the second

+	 * extent is greater than that of the first extent,

+	 * we check whether we can append the second extent

+	 * to the first extent */

+	if (eiblock[0] + enblocks[0] != eiblock[1] ||

+	    efblock[0] + enblocks[0] != efblock[1])

+		/* If the two extents are not continuous

+		 * in terms of logical block range and

+		 * physical block range, we return false */

+		return false;

+	/* Check if the total number of blocks of the two extents are

+	 * too long.

+	 * Note: the maximum length of unwritten extent is shorter than

+	 * written extent by one block */

+	if (unwritten) {

+		if (enblocks[0] + enblocks[1] > EXT4_EXT_MAX_LEN_UNWRITTEN)

+			return false;

+	} else {

+		if (enblocks[0] + enblocks[1] > EXT4_EXT_MAX_LEN_WRITTEN)

+			return false;

+	}

+	/* The second extent can be appended to the first extent */

+	return true;

+}

+/**@brief Check if the second extent can be prepended to the first extent

+ * @param ext  the first extent

+ * @param ext2 the second extent

+ * @return true if the two extents can be merged, otherwise false */

+static bool ext4_extent_can_prepend(struct ext4_extent *ext,

+				    struct ext4_extent *ext2)

+{

+	bool unwritten;

+	ext4_lblk_t eiblock[2];

+	ext4_lblk_t enblocks[2];

+	ext4_fsblk_t efblock[2];

+	eiblock[0] = ext4_extent_get_iblock(ext);

+	enblocks[0] = ext4_extent_get_nblocks(ext);

+	efblock[0] = ext4_extent_get_fblock(ext);

+	eiblock[1] = ext4_extent_get_iblock(ext2);

+	enblocks[1] = ext4_extent_get_nblocks(ext2);

+	efblock[1] = ext4_extent_get_fblock(ext2);

+	/* We can't merge an unwritten extent with a written

+	 * extent */

+	if (EXT4_EXT_IS_UNWRITTEN(ext) != EXT4_EXT_IS_UNWRITTEN(ext2))

+		return false;

+	unwritten = EXT4_EXT_IS_UNWRITTEN(ext);

+	/* Since the starting logical block of the second

+	 * extent is smaller than that of the first extent,

+	 * we check whether we can prepend the second extent

+	 * to the first extent */

+	if (eiblock[1] + enblocks[1] != eiblock[0] ||

+	    efblock[1] + enblocks[1] != efblock[0])

+		/* If the two extents are not continuous

+		 * in terms of logical block range and

+		 * physical block range, we return false */

+		return false;

+	/* Check if the total number of blocks of the two extents are

+	 * too long.

+	 * Note: the maximum length of unwritten extent is shorter than

+	 * written extent by one block */

+	if (unwritten) {

+		if (enblocks[0] + enblocks[1] > EXT4_EXT_MAX_LEN_UNWRITTEN)

+			return false;

+	} else {

+		if (enblocks[0] + enblocks[1] > EXT4_EXT_MAX_LEN_WRITTEN)

+			return false;

+	}

+	/* The second extent can be prepended to the first extent */

+	return true;

+}

+/**@brief Allocate multiple number of blocks

+ * @param inode_ref I-node

+ * @param goal      physical block allocation hint

+ * @param nblocks   number of blocks to be allocated

+ * @param fblockp   Output value - starting physical block number

+ * @param nblocksp  Output value - the number of blocks allocated

+ * @return Error code */

+static int

+ext4_extent_alloc_datablocks(struct ext4_inode_ref *inode_ref,

+			     ext4_fsblk_t goal,

+			     ext4_lblk_t nblocks,

+			     ext4_fsblk_t *fblockp,

+			     ext4_lblk_t *nblocksp)

+{

+	int ret = 0;

+	ext4_lblk_t i;

+	ext4_fsblk_t retfblock;

+	ext4_lblk_t retnblocks = 0;

+	for (i = 0; i < nblocks; ++i, ++retnblocks) {

+		bool free = false;

+		if (!i) {

+			/* We allocate the first block by using

+			 * ext4_balloc_alloc_block() so that we

+			 * can pass allocation hint to the block

+			 * allocator */

+			ret = ext4_balloc_alloc_block(inode_ref,

+						      goal,

+						      &retfblock);

+			if (ret == 0)

+				free = true;

+		} else {

+			ext4_fsblk_t blockscnt;

+			/* Do a check to make sure that we won't look into

+			 * a block number larger than the total number of

+			 * blocks we have on this filesystem */

+			blockscnt = ext4_sb_get_blocks_cnt(&inode_ref->fs->sb);

+			if (retfblock + i < blockscnt) {

+				ret = ext4_balloc_try_alloc_block(inode_ref,

+				    retfblock + i, &free);

+			} else

+				free = false;

+		}

+		/* Stop trying on the next block if we encounter errors, or

+		 * if there is insufficient space, or if we can't allocate

+		 * blocks continuously */

+		if (ret != 0 || !free)

+			break;

+	}

+	if (ret == 0) {

+		*fblockp = retfblock;

+		if (nblocksp)

+			*nblocksp = nblocks;

+	}

+	return ret;

+}

+/**@brief Extent-based blockmap manipulation

+ * @param inode_ref   I-node

+ * @param iblock      starting logical block of the inode

+ * @param max_nblocks maximum number of blocks to get from/allocate to blockmap

+ * @param resfblockp  return physical block address of the first block of an

+ * extent

+ * @param create      true if caller wants to insert mapping or convert

+ * unwritten mapping to written one

+ * @param resnblocksp return number of blocks in an extent (must be smaller than

+ * \p max_nblocks)

+ * @return Error code*/

+int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref,

+			   ext4_lblk_t iblock,

+			   ext4_lblk_t max_nblocks,

+			   ext4_fsblk_t *resfblockp,

+			   bool create,

+			   ext4_lblk_t *resnblocksp)

+{

+	int ret;

+	struct ext4_extent_path *path;

+	struct ext4_extent *ext;

+	struct ext4_extent insext;

+	ext4_lblk_t eiblock;

+	ext4_lblk_t enblocks;

+	ext4_fsblk_t efblock;

+	ext4_fsblk_t resfblock;

+	ext4_lblk_t resnblocks = 0;

+	ext4_fsblk_t goal;

+	/* Seek to the corresponding extent */

+	ret = ext4_extent_find_extent(inode_ref, iblock, &path);

+	if (ret != 0)

+		return ret;

+	ext = path[0].extent;

+	if (ext) {

+		/* The extent tree is not empty */

+		eiblock = ext4_extent_get_iblock(ext);

+		enblocks = ext4_extent_get_nblocks(ext);

+		efblock = ext4_extent_get_fblock(ext);

+		if (EXT4_EXTENT_IN_RANGE(iblock, eiblock, enblocks)) {

+			/* The extent exists and logical block requested falls

+			 * into the range of the extent */

+			resfblock = efblock + iblock - eiblock;

+			resnblocks = eiblock + enblocks - iblock;

+			/* Trim the result if it is larger than the maximum

+			 * length the caller wants */

+			if (resnblocks > max_nblocks)

+				resnblocks = max_nblocks;

+			if (EXT4_EXT_IS_UNWRITTEN(ext)) {

+				if (create)

+					/* Convert the extent to written extent

+					 * if the extent is unwritten extent */

+					ret = ext4_extent_convert_written(inode_ref,

+									  path,

+									  iblock,

+									  resnblocks);

+				else

+					/* We are not asked to modify the blockmap

+					 * so we just return a hole */

+					resfblock = 0;

+			}

+			goto cleanup;

+		}

+		if (!create) {

+			/* Don't waste time on finding the next extent if we

+			 * are not asked to insert mapping, just return a

+			 * hole */

+			resfblock = 0;

+			resnblocks = 1;

+			goto cleanup;

+		}

+		if (ext4_extent_get_iblock(ext) < iblock) {

+			/* Since the logical block of current extent is smaller

+			 * the requested logical block, we seek to the next

+			 * extent to find the maximum number of blocks we can

+			 * allocate without hitting the starting logical block

+			 * of the next extent */

+			bool nonext;

+			/* Go to the next extent */

+			ret = ext4_extent_increment(inode_ref, path, &nonext);

+			if (ret != 0)

+				goto cleanup;

+			if (!nonext) {

+				/* We successfully reach the next extent */

+				bool noprev;

+				ext4_lblk_t neiblock;

+				ext = path[0].extent;

+				/* The next extent must start at greater logical

+				 * block number */

+				assert(ext4_extent_get_iblock(ext) >

+				    iblock);

+				/* Calculate the maximum number of blocks we

+				 * can allocate without overlapping with the

+				 * next extent */

+				neiblock = ext4_extent_get_iblock(ext);

+				if (max_nblocks > neiblock - iblock)

+					max_nblocks = neiblock - iblock;

+				/* Go back to the previous extent */

+				ret = ext4_extent_decrement(inode_ref, path,

+							    &noprev);

+				if (ret != 0)

+					goto cleanup;

+				assert(!noprev);

+				ext = path[0].extent;

+			}

+		}

+	}

+	/* Return a hole if we are not asked to insert mapping */

+	if (!create) {

+		resfblock = 0;

+		resnblocks = 1;

+		goto cleanup;

+	}

+	/* Multiple data blocks allocation */

+	goal = ext4_extent_data_alloc_goal(inode_ref, path, iblock);

+	ret = ext4_extent_alloc_datablocks(inode_ref, goal, max_nblocks,

+					   &resfblock, &max_nblocks);

+	if (ret != 0)

+		goto cleanup;

+	ext4_extent_set_iblock(&insext, iblock);

+	ext4_extent_set_nblocks(&insext, max_nblocks, false);

+	ext4_extent_set_fblock(&insext, resfblock);

+	if (ext && ext4_extent_can_append(ext, &insext)) {

+		/* Clang won't complain, it's just to make gcc happy */

+		enblocks = ext4_extent_get_nblocks(ext);

+		/* If we can append this extent to the current extent */

+		ext4_extent_set_nblocks(ext, enblocks + max_nblocks,

+					EXT4_EXT_IS_UNWRITTEN(ext));

+		ext4_extent_path_dirty(inode_ref, path, 0);

+	} else if (ext && ext4_extent_can_prepend(ext, &insext)) {

+		/* Clang won't complain, it's just to make gcc happy */

+		enblocks = ext4_extent_get_nblocks(ext);

+		/* If we can prepend this extent to the current extent */

+		ext4_extent_set_iblock(ext, iblock);

+		ext4_extent_set_nblocks(ext, enblocks + max_nblocks,

+					EXT4_EXT_IS_UNWRITTEN(ext));

+		ext4_extent_set_fblock(ext, resfblock);

+		/* If we are working on the first extent in the

+		 * first leaf (in case we are actually prepending

+		 * mappings) we need to update the index of nodes.

+		 *

+		 * NOTE: Since we don't seek to the next extent and

+		 * try to modify it, prepending should not happen at

+		 * any leaves except the first extent of the first leaf */

+		ext4_extent_update_index(inode_ref, path, false);

+		ext4_extent_path_dirty(inode_ref, path, 0);

+	} else {

+		/* Finally, insert a new extent into the extent tree */

+		ret = ext4_extent_insert(inode_ref, path, &insext);

+		if (ret != 0)

+			ext4_balloc_free_blocks(inode_ref, resfblock,

+						max_nblocks);

+	}

+	resnblocks = max_nblocks;

+cleanup:

+	/* Put loaded blocks */

+	ext4_extent_path_release(inode_ref, path);

+	/* Destroy temporary data structure */

+	ext4_free(path);

+	if (ret == 0) {

+		if (resfblockp)

+			*resfblockp = resfblock;

+		if (resnblocksp)

+			*resnblocksp = resnblocks;

+	}

+	return ret;

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_fs.c

@@ -1,0 +1,1699 @@

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+#include "ext4_debug.h"

+#include "ext4_trans.h"

+#include "ext4_fs.h"

+#include "ext4_blockdev.h"

+#include "ext4_super.h"

+#include "ext4_crc32.h"

+#include "ext4_block_group.h"

+#include "ext4_balloc.h"

+#include "ext4_bitmap.h"

+#include "ext4_inode.h"

+#include "ext4_ialloc.h"

+#include "ext4_extent.h"

+int ext4_fs_init(struct ext4_fs *fs, struct ext4_blockdev *bdev,

+		 bool read_only)

+{

+	int r, i;

+	u16int tmp;

+	u32int bsize;

+	assert(fs && bdev);

+	fs->bdev = bdev;

+	fs->read_only = read_only;

+	r = ext4_sb_read(fs->bdev, &fs->sb);

+	if (r != 0)

+		return r;

+	if (!ext4_sb_check(&fs->sb)) {

+		werrstr("superblock: %r");

+		return -1;

+	}

+	bsize = ext4_sb_get_block_size(&fs->sb);

+	if (bsize > EXT4_MAX_BLOCK_SIZE) {

+		werrstr("invalid block size: %d", bsize);

+		return -1;

+	}

+	r = ext4_fs_check_features(fs, &read_only);

+	if (r != 0)

+		return r;

+	if (read_only)

+		fs->read_only = read_only;

+	/* Compute limits for indirect block levels */

+	u32int blocks_id = bsize / sizeof(u32int);

+	fs->inode_block_limits[0] = EXT4_INODE_DIRECT_BLOCK_COUNT;

+	fs->inode_blocks_per_level[0] = 1;

+	for (i = 1; i < 4; i++) {

+		fs->inode_blocks_per_level[i] =

+		    fs->inode_blocks_per_level[i - 1] * blocks_id;

+		fs->inode_block_limits[i] = fs->inode_block_limits[i - 1] +

+					    fs->inode_blocks_per_level[i];

+	}

+	/*Validate FS*/

+	tmp = ext4_get16(&fs->sb, state);

+	if (tmp & EXT4_SUPERBLOCK_STATE_ERROR_FS)

+		ext4_dbg(DEBUG_FS, DBG_WARN

+				"last umount error: superblock fs_error flag\n");

+	if (!fs->read_only) {

+		/* Mark system as mounted */

+		ext4_set16(&fs->sb, state, EXT4_SUPERBLOCK_STATE_ERROR_FS);

+		r = ext4_sb_write(fs->bdev, &fs->sb);

+		if (r != 0)

+			return r;

+		/*Update mount count*/

+		ext4_set16(&fs->sb, mount_count, ext4_get16(&fs->sb, mount_count) + 1);

+	}

+	return r;

+}

+int ext4_fs_fini(struct ext4_fs *fs)

+{

+	assert(fs);

+	/*Set superblock state*/

+	ext4_set16(&fs->sb, state, EXT4_SUPERBLOCK_STATE_VALID_FS);

+	if (!fs->read_only)

+		return ext4_sb_write(fs->bdev, &fs->sb);

+	return 0;

+}

+static void ext4_fs_debug_features_inc(u32int features_incompatible)

+{

+	if (features_incompatible & EXT4_FINCOM_COMPRESSION)

+		ext4_dbg(DEBUG_FS, DBG_NONE "compression\n");

+	if (features_incompatible & EXT4_FINCOM_FILETYPE)

+		ext4_dbg(DEBUG_FS, DBG_NONE "filetype\n");

+	if (features_incompatible & EXT4_FINCOM_RECOVER)

+		ext4_dbg(DEBUG_FS, DBG_NONE "recover\n");

+	if (features_incompatible & EXT4_FINCOM_JOURNAL_DEV)

+		ext4_dbg(DEBUG_FS, DBG_NONE "journal_dev\n");

+	if (features_incompatible & EXT4_FINCOM_META_BG)

+		ext4_dbg(DEBUG_FS, DBG_NONE "meta_bg\n");

+	if (features_incompatible & EXT4_FINCOM_EXTENTS)

+		ext4_dbg(DEBUG_FS, DBG_NONE "extents\n");

+	if (features_incompatible & EXT4_FINCOM_64BIT)

+		ext4_dbg(DEBUG_FS, DBG_NONE "64bit\n");

+	if (features_incompatible & EXT4_FINCOM_MMP)

+		ext4_dbg(DEBUG_FS, DBG_NONE "mnp\n");

+	if (features_incompatible & EXT4_FINCOM_FLEX_BG)

+		ext4_dbg(DEBUG_FS, DBG_NONE "flex_bg\n");

+	if (features_incompatible & EXT4_FINCOM_EA_INODE)

+		ext4_dbg(DEBUG_FS, DBG_NONE "ea_inode\n");

+	if (features_incompatible & EXT4_FINCOM_DIRDATA)

+		ext4_dbg(DEBUG_FS, DBG_NONE "dirdata\n");

+	if (features_incompatible & EXT4_FINCOM_BG_USE_META_CSUM)

+		ext4_dbg(DEBUG_FS, DBG_NONE "meta_csum\n");

+	if (features_incompatible & EXT4_FINCOM_LARGEDIR)

+		ext4_dbg(DEBUG_FS, DBG_NONE "largedir\n");

+	if (features_incompatible & EXT4_FINCOM_INLINE_DATA)

+		ext4_dbg(DEBUG_FS, DBG_NONE "inline_data\n");

+}

+static void ext4_fs_debug_features_comp(u32int features_compatible)

+{

+	if (features_compatible & EXT4_FCOM_DIR_PREALLOC)

+		ext4_dbg(DEBUG_FS, DBG_NONE "dir_prealloc\n");

+	if (features_compatible & EXT4_FCOM_IMAGIC_INODES)

+		ext4_dbg(DEBUG_FS, DBG_NONE "imagic_inodes\n");

+	if (features_compatible & EXT4_FCOM_HAS_JOURNAL)

+		ext4_dbg(DEBUG_FS, DBG_NONE "has_journal\n");

+	if (features_compatible & EXT4_FCOM_EXT_ATTR)

+		ext4_dbg(DEBUG_FS, DBG_NONE "ext_attr\n");

+	if (features_compatible & EXT4_FCOM_RESIZE_INODE)

+		ext4_dbg(DEBUG_FS, DBG_NONE "resize_inode\n");

+	if (features_compatible & EXT4_FCOM_DIR_INDEX)

+		ext4_dbg(DEBUG_FS, DBG_NONE "dir_index\n");

+}

+static void ext4_fs_debug_features_ro(u32int features_ro)

+{

+	if (features_ro & EXT4_FRO_COM_SPARSE_SUPER)

+		ext4_dbg(DEBUG_FS, DBG_NONE "sparse_super\n");

+	if (features_ro & EXT4_FRO_COM_LARGE_FILE)

+		ext4_dbg(DEBUG_FS, DBG_NONE "large_file\n");

+	if (features_ro & EXT4_FRO_COM_BTREE_DIR)

+		ext4_dbg(DEBUG_FS, DBG_NONE "btree_dir\n");

+	if (features_ro & EXT4_FRO_COM_HUGE_FILE)

+		ext4_dbg(DEBUG_FS, DBG_NONE "huge_file\n");

+	if (features_ro & EXT4_FRO_COM_GDT_CSUM)

+		ext4_dbg(DEBUG_FS, DBG_NONE "gtd_csum\n");

+	if (features_ro & EXT4_FRO_COM_DIR_NLINK)

+		ext4_dbg(DEBUG_FS, DBG_NONE "dir_nlink\n");

+	if (features_ro & EXT4_FRO_COM_EXTRA_ISIZE)

+		ext4_dbg(DEBUG_FS, DBG_NONE "extra_isize\n");

+	if (features_ro & EXT4_FRO_COM_QUOTA)

+		ext4_dbg(DEBUG_FS, DBG_NONE "quota\n");

+	if (features_ro & EXT4_FRO_COM_BIGALLOC)

+		ext4_dbg(DEBUG_FS, DBG_NONE "bigalloc\n");

+	if (features_ro & EXT4_FRO_COM_METADATA_CSUM)

+		ext4_dbg(DEBUG_FS, DBG_NONE "metadata_csum\n");

+}

+int ext4_fs_check_features(struct ext4_fs *fs, bool *read_only)

+{

+	assert(fs && read_only);

+	u32int v;

+	if (ext4_get32(&fs->sb, rev_level) == 0) {

+		*read_only = false;

+		return 0;

+	}

+	ext4_dbg(DEBUG_FS, DBG_INFO "sblock features_incompatible:\n");

+	ext4_fs_debug_features_inc(ext4_get32(&fs->sb, features_incompatible));

+	ext4_dbg(DEBUG_FS, DBG_INFO "sblock features_compatible:\n");

+	ext4_fs_debug_features_comp(ext4_get32(&fs->sb, features_compatible));

+	ext4_dbg(DEBUG_FS, DBG_INFO "sblock features_read_only:\n");

+	ext4_fs_debug_features_ro(ext4_get32(&fs->sb, features_read_only));

+	/*Check features_incompatible*/

+	v = ext4_get32(&fs->sb, features_incompatible) &

+	     ~(EXT4_SUPPORTED_FINCOM | EXT_FINCOM_IGNORED);

+	if (v) {

+		ext4_dbg(DEBUG_FS, DBG_ERROR

+				"sblock has unsupported features incompatible:\n");

+		ext4_fs_debug_features_inc(v);

+		werrstr("unsupported features");

+		return -1;

+	}

+	/*Check features_read_only*/

+	v = ext4_get32(&fs->sb, features_read_only);

+	v &= ~EXT4_SUPPORTED_FRO_COM;

+	if (v) {

+		ext4_dbg(DEBUG_FS, DBG_WARN

+			"sblock has unsupported features read only:\n");

+		ext4_fs_debug_features_ro(v);

+		*read_only = true;

+		return 0;

+	}

+	*read_only = false;

+	return 0;

+}

+/**@brief Determine whether the block is inside the group.

+ * @param baddr   block address

+ * @param bgid    block group id

+ * @return Error code

+ */

+static bool ext4_block_in_group(struct ext4_sblock *s, ext4_fsblk_t baddr,

+			        u32int bgid)

+{

+	u32int actual_bgid;

+	actual_bgid = ext4_balloc_get_bgid_of_block(s, baddr);

+	if (actual_bgid == bgid)

+		return true;

+	return false;

+}

+/**@brief   To avoid calling the atomic setbit hundreds or thousands of times, we only

+ *          need to use it within a single byte (to ensure we get endianness right).

+ *          We can use memset for the rest of the bitmap as there are no other users.

+ */

+static void ext4_fs_mark_bitmap_end(int start_bit, int end_bit, void *bitmap)

+{

+	int i;

+	if (start_bit >= end_bit)

+		return;

+	for (i = start_bit; (unsigned)i < ((start_bit + 7) & ~7UL); i++)

+		ext4_bmap_bit_set(bitmap, i);

+	if (i < end_bit)

+		memset((char *)bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);

+}

+/**@brief Initialize block bitmap in block group.

+ * @param bg_ref Reference to block group

+ * @return Error code

+ */

+static int ext4_fs_init_block_bitmap(struct ext4_block_group_ref *bg_ref)

+{

+	struct ext4_sblock *sb = &bg_ref->fs->sb;

+	struct ext4_bgroup *bg = bg_ref->block_group;

+	int rc;

+	u32int bit, bit_max;

+	u32int group_blocks;

+	u16int inode_size = ext4_get16(sb, inode_size);

+	u32int block_size = ext4_sb_get_block_size(sb);

+	u32int inodes_per_group = ext4_get32(sb, inodes_per_group);

+	ext4_fsblk_t i;

+	ext4_fsblk_t bmp_blk = ext4_bg_get_block_bitmap(bg, sb);

+	ext4_fsblk_t bmp_inode = ext4_bg_get_inode_bitmap(bg, sb);

+	ext4_fsblk_t inode_table = ext4_bg_get_inode_table_first_block(bg, sb);

+	ext4_fsblk_t first_bg = ext4_balloc_get_block_of_bgid(sb, bg_ref->index);

+	u32int dsc_per_block =  block_size / ext4_sb_get_desc_size(sb);

+	bool flex_bg = ext4_sb_feature_incom(sb, EXT4_FINCOM_FLEX_BG);

+	bool meta_bg = ext4_sb_feature_incom(sb, EXT4_FINCOM_META_BG);

+	u32int inode_table_bcnt = inodes_per_group * inode_size / block_size;

+	struct ext4_block block_bitmap;

+	rc = ext4_trans_block_get_noread(bg_ref->fs->bdev, &block_bitmap, bmp_blk);

+	if (rc != 0)

+		return rc;

+	memset(block_bitmap.data, 0, block_size);

+	bit_max = ext4_sb_is_super_in_bg(sb, bg_ref->index);

+	u32int count = ext4_sb_first_meta_bg(sb) * dsc_per_block;

+	if (!meta_bg || bg_ref->index < count) {

+		if (bit_max) {

+			bit_max += ext4_bg_num_gdb(sb, bg_ref->index);

+			bit_max += ext4_get16(sb, s_reserved_gdt_blocks);

+		}

+	} else { /* For META_BG_BLOCK_GROUPS */

+		bit_max += ext4_bg_num_gdb(sb, bg_ref->index);

+	}

+	for (bit = 0; bit < bit_max; bit++)

+		ext4_bmap_bit_set(block_bitmap.data, bit);

+	if (bg_ref->index == ext4_block_group_cnt(sb) - 1) {

+		/*

+		 * Even though mke2fs always initialize first and last group

+		 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need

+		 * to make sure we calculate the right free blocks

+		 */

+		group_blocks = (u32int)(ext4_sb_get_blocks_cnt(sb) -

+					  ext4_get32(sb, first_data_block) -

+					  ext4_get32(sb, blocks_per_group) *

+					  (ext4_block_group_cnt(sb) - 1));

+	} else {

+		group_blocks = ext4_get32(sb, blocks_per_group);

+	}

+	bool in_bg;

+	in_bg = ext4_block_in_group(sb, bmp_blk, bg_ref->index);

+	if (!flex_bg || in_bg)

+		ext4_bmap_bit_set(block_bitmap.data,

+				  (u32int)(bmp_blk - first_bg));

+	in_bg = ext4_block_in_group(sb, bmp_inode, bg_ref->index);

+	if (!flex_bg || in_bg)

+		ext4_bmap_bit_set(block_bitmap.data,

+				  (u32int)(bmp_inode - first_bg));

+        for (i = inode_table; i < inode_table + inode_table_bcnt; i++) {

+		in_bg = ext4_block_in_group(sb, i, bg_ref->index);

+		if (!flex_bg || in_bg)

+			ext4_bmap_bit_set(block_bitmap.data,

+					  (u32int)(i - first_bg));

+	}

+        /*

+         * Also if the number of blocks within the group is

+         * less than the blocksize * 8 ( which is the size

+         * of bitmap ), set rest of the block bitmap to 1

+         */

+        ext4_fs_mark_bitmap_end(group_blocks, block_size * 8, block_bitmap.data);

+	ext4_trans_set_block_dirty(block_bitmap.buf);

+	ext4_balloc_set_bitmap_csum(sb, bg_ref->block_group, block_bitmap.data);

+	bg_ref->dirty = true;

+	/* Save bitmap */

+	return ext4_block_set(bg_ref->fs->bdev, &block_bitmap);

+}

+/**@brief Initialize i-node bitmap in block group.

+ * @param bg_ref Reference to block group

+ * @return Error code

+ */

+static int ext4_fs_init_inode_bitmap(struct ext4_block_group_ref *bg_ref)

+{

+	int rc;

+	struct ext4_sblock *sb = &bg_ref->fs->sb;

+	struct ext4_bgroup *bg = bg_ref->block_group;

+	/* Load bitmap */

+	ext4_fsblk_t bitmap_block_addr = ext4_bg_get_inode_bitmap(bg, sb);

+	struct ext4_block b;

+	rc = ext4_trans_block_get_noread(bg_ref->fs->bdev, &b, bitmap_block_addr);

+	if (rc != 0)

+		return rc;

+	/* Initialize all bitmap bits to zero */

+	u32int block_size = ext4_sb_get_block_size(sb);

+	u32int inodes_per_group = ext4_get32(sb, inodes_per_group);

+	memset(b.data, 0, (inodes_per_group + 7) / 8);

+	u32int start_bit = inodes_per_group;

+	u32int end_bit = block_size * 8;

+	u32int i;

+	for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)

+		ext4_bmap_bit_set(b.data, i);

+	if (i < end_bit)

+		memset(b.data + (i >> 3), 0xff, (end_bit - i) >> 3);

+	ext4_trans_set_block_dirty(b.buf);

+	ext4_ialloc_set_bitmap_csum(sb, bg, b.data);

+	bg_ref->dirty = true;

+	/* Save bitmap */

+	return ext4_block_set(bg_ref->fs->bdev, &b);

+}

+/**@brief Initialize i-node table in block group.

+ * @param bg_ref Reference to block group

+ * @return Error code

+ */

+static int ext4_fs_init_inode_table(struct ext4_block_group_ref *bg_ref)

+{

+	struct ext4_sblock *sb = &bg_ref->fs->sb;

+	struct ext4_bgroup *bg = bg_ref->block_group;

+	u32int inode_size = ext4_get16(sb, inode_size);

+	u32int block_size = ext4_sb_get_block_size(sb);

+	u32int inodes_per_block = block_size / inode_size;

+	u32int inodes_in_group = ext4_inodes_in_group_cnt(sb, bg_ref->index);

+	u32int table_blocks = inodes_in_group / inodes_per_block;

+	ext4_fsblk_t fblock;

+	if (inodes_in_group % inodes_per_block)

+		table_blocks++;

+	/* Compute initialization bounds */

+	ext4_fsblk_t first_block = ext4_bg_get_inode_table_first_block(bg, sb);

+	ext4_fsblk_t last_block = first_block + table_blocks - 1;

+	/* Initialization of all itable blocks */

+	for (fblock = first_block; fblock <= last_block; ++fblock) {

+		struct ext4_block b;

+		int rc = ext4_trans_block_get_noread(bg_ref->fs->bdev, &b, fblock);

+		if (rc != 0)

+			return rc;

+		memset(b.data, 0, block_size);

+		ext4_trans_set_block_dirty(b.buf);

+		rc = ext4_block_set(bg_ref->fs->bdev, &b);

+		if (rc != 0)

+			return rc;

+	}

+	return 0;

+}

+static ext4_fsblk_t ext4_fs_get_descriptor_block(struct ext4_sblock *s,

+					     u32int bgid,

+					     u32int dsc_per_block)

+{

+	u32int first_meta_bg, dsc_id;

+	int has_super = 0;

+	dsc_id = bgid / dsc_per_block;

+	first_meta_bg = ext4_sb_first_meta_bg(s);

+	bool meta_bg = ext4_sb_feature_incom(s, EXT4_FINCOM_META_BG);

+	if (!meta_bg || dsc_id < first_meta_bg)

+		return ext4_get32(s, first_data_block) + dsc_id + 1;

+	if (ext4_sb_is_super_in_bg(s, bgid))

+		has_super = 1;

+	return (has_super + ext4_fs_first_bg_block_no(s, bgid));

+}

+/**@brief  Compute checksum of block group descriptor.

+ * @param sb   Superblock

+ * @param bgid Index of block group in the filesystem

+ * @param bg   Block group to compute checksum for

+ * @return Checksum value

+ */

+static u16int ext4_fs_bg_checksum(struct ext4_sblock *sb, u32int bgid,

+				    struct ext4_bgroup *bg)

+{

+	/* If checksum not supported, 0 will be returned */

+	u16int crc = 0;

+	/* Compute the checksum only if the filesystem supports it */

+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {

+		/* Use metadata_csum algorithm instead */

+		u32int le32_bgid = to_le32(bgid);

+		u32int orig_checksum, checksum;

+		/* Preparation: temporarily set bg checksum to 0 */

+		orig_checksum = bg->checksum;

+		bg->checksum = 0;

+		/* First calculate crc32 checksum against fs uuid */

+		checksum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid,

+				sizeof(sb->uuid));

+		/* Then calculate crc32 checksum against bgid */

+		checksum = ext4_crc32c(checksum, &le32_bgid, sizeof(bgid));

+		/* Finally calculate crc32 checksum against block_group_desc */

+		checksum = ext4_crc32c(checksum, bg, ext4_sb_get_desc_size(sb));

+		bg->checksum = orig_checksum;

+		crc = checksum & 0xFFFF;

+		return crc;

+	}

+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_GDT_CSUM)) {

+		u8int *base = (u8int *)bg;

+		u8int *checksum = (u8int *)&bg->checksum;

+		u32int offset = (u32int)(checksum - base);

+		/* Convert block group index to little endian */

+		u32int group = to_le32(bgid);

+		/* Initialization */

+		crc = ext4_bg_crc16(~0, sb->uuid, sizeof(sb->uuid));

+		/* Include index of block group */

+		crc = ext4_bg_crc16(crc, (u8int *)&group, sizeof(group));

+		/* Compute crc from the first part (stop before checksum field)

+		 */

+		crc = ext4_bg_crc16(crc, (u8int *)bg, offset);

+		/* Skip checksum */

+		offset += sizeof(bg->checksum);

+		/* Checksum of the rest of block group descriptor */

+		if ((ext4_sb_feature_incom(sb, EXT4_FINCOM_64BIT)) &&

+		    (offset < ext4_sb_get_desc_size(sb))) {

+			const u8int *start = ((u8int *)bg) + offset;

+			usize len = ext4_sb_get_desc_size(sb) - offset;

+			crc = ext4_bg_crc16(crc, start, len);

+		}

+	}

+	return crc;

+}

+static bool ext4_fs_verify_bg_csum(struct ext4_sblock *sb,

+				   u32int bgid,

+				   struct ext4_bgroup *bg)

+{

+	if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))

+		return true;

+	return ext4_fs_bg_checksum(sb, bgid, bg) == to_le16(bg->checksum);

+}

+int ext4_fs_get_block_group_ref(struct ext4_fs *fs, u32int bgid,

+				struct ext4_block_group_ref *ref)

+{

+	/* Compute number of descriptors, that fits in one data block */

+	u32int block_size = ext4_sb_get_block_size(&fs->sb);

+	u32int dsc_cnt = block_size / ext4_sb_get_desc_size(&fs->sb);

+	/* Block group descriptor table starts at the next block after

+	 * superblock */

+	u64int block_id = ext4_fs_get_descriptor_block(&fs->sb, bgid, dsc_cnt);

+	u32int offset = (bgid % dsc_cnt) * ext4_sb_get_desc_size(&fs->sb);

+	int rc = ext4_trans_block_get(fs->bdev, &ref->block, block_id);

+	if (rc != 0)

+		return rc;

+	ref->block_group = (void *)(ref->block.data + offset);

+	ref->fs = fs;

+	ref->index = bgid;

+	ref->dirty = false;

+	struct ext4_bgroup *bg = ref->block_group;

+	if (!ext4_fs_verify_bg_csum(&fs->sb, bgid, bg)) {

+		ext4_dbg(DEBUG_FS,

+			 DBG_WARN "Block group descriptor checksum failed."

+			 "Block group index: %ud\n",

+			 bgid);

+	}

+	if (ext4_bg_has_flag(bg, EXT4_BLOCK_GROUP_BLOCK_UNINIT)) {

+		rc = ext4_fs_init_block_bitmap(ref);

+		if (rc != 0) {

+			ext4_block_set(fs->bdev, &ref->block);

+			return rc;

+		}

+		ext4_bg_clear_flag(bg, EXT4_BLOCK_GROUP_BLOCK_UNINIT);

+		ref->dirty = true;

+	}

+	if (ext4_bg_has_flag(bg, EXT4_BLOCK_GROUP_INODE_UNINIT)) {

+		rc = ext4_fs_init_inode_bitmap(ref);

+		if (rc != 0) {

+			ext4_block_set(ref->fs->bdev, &ref->block);

+			return rc;

+		}

+		ext4_bg_clear_flag(bg, EXT4_BLOCK_GROUP_INODE_UNINIT);

+		if (!ext4_bg_has_flag(bg, EXT4_BLOCK_GROUP_ITABLE_ZEROED)) {

+			rc = ext4_fs_init_inode_table(ref);

+			if (rc != 0) {

+				ext4_block_set(fs->bdev, &ref->block);

+				return rc;

+			}

+			ext4_bg_set_flag(bg, EXT4_BLOCK_GROUP_ITABLE_ZEROED);

+		}

+		ref->dirty = true;

+	}

+	return 0;

+}

+int ext4_fs_put_block_group_ref(struct ext4_block_group_ref *ref)

+{

+	/* Check if reference modified */

+	if (ref->dirty) {

+		/* Compute new checksum of block group */

+		u16int cs;

+		cs = ext4_fs_bg_checksum(&ref->fs->sb, ref->index,

+					 ref->block_group);

+		ref->block_group->checksum = to_le16(cs);

+		/* Mark block dirty for writing changes to physical device */

+		ext4_trans_set_block_dirty(ref->block.buf);

+	}

+	/* Put back block, that contains block group descriptor */

+	return ext4_block_set(ref->fs->bdev, &ref->block);

+}

+static u32int ext4_fs_inode_checksum(struct ext4_inode_ref *inode_ref)

+{

+	u32int checksum = 0;

+	struct ext4_sblock *sb = &inode_ref->fs->sb;

+	u16int inode_size = ext4_get16(sb, inode_size);

+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {

+		u32int orig_checksum;

+		u32int ino_index = to_le32(inode_ref->index);

+		u32int ino_gen =

+			to_le32(ext4_inode_get_generation(inode_ref->inode));

+		/* Preparation: temporarily set bg checksum to 0 */

+		orig_checksum = ext4_inode_get_csum(sb, inode_ref->inode);

+		ext4_inode_set_csum(sb, inode_ref->inode, 0);

+		/* First calculate crc32 checksum against fs uuid */

+		checksum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid,

+				       sizeof(sb->uuid));

+		/* Then calculate crc32 checksum against inode number

+		 * and inode generation */

+		checksum = ext4_crc32c(checksum, &ino_index, sizeof(ino_index));

+		checksum = ext4_crc32c(checksum, &ino_gen, sizeof(ino_gen));

+		/* Finally calculate crc32 checksum against

+		 * the entire inode */

+		checksum = ext4_crc32c(checksum, inode_ref->inode, inode_size);

+		ext4_inode_set_csum(sb, inode_ref->inode, orig_checksum);

+		/* If inode size is not large enough to hold the

+		 * upper 16bit of the checksum */

+		if (inode_size == EXT4_GOOD_OLD_INODE_SIZE)

+			checksum &= 0xFFFF;

+	}

+	return checksum;

+}

+static void ext4_fs_set_inode_checksum(struct ext4_inode_ref *inode_ref)

+{

+	struct ext4_sblock *sb = &inode_ref->fs->sb;

+	if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))

+		return;

+	u32int csum = ext4_fs_inode_checksum(inode_ref);

+	ext4_inode_set_csum(sb, inode_ref->inode, csum);

+}

+static bool ext4_fs_verify_inode_csum(struct ext4_inode_ref *inode_ref)

+{

+	struct ext4_sblock *sb = &inode_ref->fs->sb;

+	if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))

+		return true;

+	return ext4_inode_get_csum(sb, inode_ref->inode) ==

+		ext4_fs_inode_checksum(inode_ref);

+}

+static int

+__ext4_fs_get_inode_ref(struct ext4_fs *fs, u32int index,

+			struct ext4_inode_ref *ref,

+			bool initialized)

+{

+	/* Compute number of i-nodes, that fits in one data block */

+	u32int inodes_per_group = ext4_get32(&fs->sb, inodes_per_group);

+	/*

+	 * Inode numbers are 1-based, but it is simpler to work with 0-based

+	 * when computing indices

+	 */

+	index -= 1;

+	u32int block_group = index / inodes_per_group;

+	u32int offset_in_group = index % inodes_per_group;

+	/* Load block group, where i-node is located */

+	struct ext4_block_group_ref bg_ref;

+	int rc = ext4_fs_get_block_group_ref(fs, block_group, &bg_ref);

+	if (rc != 0) {

+		return rc;

+	}

+	/* Load block address, where i-node table is located */

+	ext4_fsblk_t inode_table_start =

+	    ext4_bg_get_inode_table_first_block(bg_ref.block_group, &fs->sb);

+	/* Put back block group reference (not needed more) */

+	rc = ext4_fs_put_block_group_ref(&bg_ref);

+	if (rc != 0) {

+		return rc;

+	}

+	/* Compute position of i-node in the block group */

+	u16int inode_size = ext4_get16(&fs->sb, inode_size);

+	u32int block_size = ext4_sb_get_block_size(&fs->sb);

+	u32int byte_offset_in_group = offset_in_group * inode_size;

+	/* Compute block address */

+	ext4_fsblk_t block_id =

+	    inode_table_start + (byte_offset_in_group / block_size);

+	rc = ext4_trans_block_get(fs->bdev, &ref->block, block_id);

+	if (rc != 0) {

+		return rc;

+	}

+	/* Compute position of i-node in the data block */

+	u32int offset_in_block = byte_offset_in_group % block_size;

+	ref->inode = (struct ext4_inode *)(ref->block.data + offset_in_block);

+	/* We need to store the original value of index in the reference */

+	ref->index = index + 1;

+	ref->fs = fs;

+	ref->dirty = false;

+	if (initialized && !ext4_fs_verify_inode_csum(ref)) {

+		ext4_dbg(DEBUG_FS,

+			DBG_WARN "Inode checksum failed."

+			"Inode: %ud\n",

+			ref->index);

+	}

+	return 0;

+}

+int ext4_fs_get_inode_ref(struct ext4_fs *fs, u32int index,

+			  struct ext4_inode_ref *ref)

+{

+	return __ext4_fs_get_inode_ref(fs, index, ref, true);

+}

+int ext4_fs_put_inode_ref(struct ext4_inode_ref *ref)

+{

+	/* Check if reference modified */

+	if (ref->dirty) {

+		/* Mark block dirty for writing changes to physical device */

+		ext4_fs_set_inode_checksum(ref);

+		ext4_trans_set_block_dirty(ref->block.buf);

+	}

+	/* Put back block, that contains i-node */

+	return ext4_block_set(ref->fs->bdev, &ref->block);

+}

+void ext4_fs_inode_blocks_init(struct ext4_fs *fs,

+			       struct ext4_inode_ref *inode_ref)

+{

+	struct ext4_inode *inode = inode_ref->inode;

+	/* Reset blocks array. For inode which is not directory or file, just

+	 * fill in blocks with 0 */

+	switch (ext4_inode_type(&fs->sb, inode)) {

+	case EXT4_INODE_MODE_FILE:

+	case EXT4_INODE_MODE_DIRECTORY:

+		break;

+	default:

+		return;

+	}

+	/* Initialize extents if needed */

+	if (ext4_sb_feature_incom(&fs->sb, EXT4_FINCOM_EXTENTS)) {

+		ext4_inode_set_flag(inode, EXT4_INODE_FLAG_EXTENTS);

+		/* Initialize extent root header */

+		ext4_extent_tree_init(inode_ref);

+	}

+	inode_ref->dirty = true;

+}

+u32int ext4_fs_correspond_inode_mode(int filetype)

+{

+	switch (filetype) {

+	case EXT4_DE_DIR:

+		return EXT4_INODE_MODE_DIRECTORY;

+	case EXT4_DE_REG_FILE:

+		return EXT4_INODE_MODE_FILE;

+	case EXT4_DE_SYMLINK:

+		return EXT4_INODE_MODE_SOFTLINK;

+	case EXT4_DE_CHRDEV:

+		return EXT4_INODE_MODE_CHARDEV;

+	case EXT4_DE_BLKDEV:

+		return EXT4_INODE_MODE_BLOCKDEV;

+	case EXT4_DE_FIFO:

+		return EXT4_INODE_MODE_FIFO;

+	case EXT4_DE_SOCK:

+		return EXT4_INODE_MODE_SOCKET;

+	}

+	/* FIXME: unsupported filetype */

+	return EXT4_INODE_MODE_FILE;

+}

+int ext4_fs_alloc_inode(struct ext4_fs *fs, struct ext4_inode_ref *inode_ref,

+			int filetype)

+{

+	/* Check if newly allocated i-node will be a directory */

+	bool is_dir;

+	u16int inode_size = ext4_get16(&fs->sb, inode_size);

+	is_dir = (filetype == EXT4_DE_DIR);

+	/* Allocate inode by allocation algorithm */

+	u32int index;

+	int rc = ext4_ialloc_alloc_inode(fs, &index, is_dir);

+	if (rc != 0)

+		return rc;

+	/* Load i-node from on-disk i-node table */

+	rc = __ext4_fs_get_inode_ref(fs, index, inode_ref, false);

+	if (rc != 0) {

+		ext4_ialloc_free_inode(fs, index, is_dir);

+		return rc;

+	}

+	/* Initialize i-node */

+	struct ext4_inode *inode = inode_ref->inode;

+	memset(inode, 0, inode_size);

+	u32int mode;

+	if (is_dir) {

+		/*

+		 * Default directory permissions to be compatible with other

+		 * systems

+		 * 0777 (octal) == rwxrwxrwx

+		 */

+		mode = 0777;

+		mode |= EXT4_INODE_MODE_DIRECTORY;

+	} else if (filetype == EXT4_DE_SYMLINK) {

+		/*

+		 * Default symbolic link permissions to be compatible with other systems

+		 * 0777 (octal) == rwxrwxrwx

+		 */

+		mode = 0777;

+		mode |= EXT4_INODE_MODE_SOFTLINK;

+	} else {

+		/*

+		 * Default file permissions to be compatible with other systems

+		 * 0666 (octal) == rw-rw-rw-

+		 */

+		mode = 0666;

+		mode |= ext4_fs_correspond_inode_mode(filetype);

+	}

+	ext4_inode_set_mode(&fs->sb, inode, mode);

+	ext4_inode_set_links_cnt(inode, 0);

+	ext4_inode_set_uid(inode, 0);

+	ext4_inode_set_gid(inode, 0);

+	ext4_inode_set_size(inode, 0);

+	ext4_inode_set_access_time(inode, 0);

+	ext4_inode_set_change_inode_time(inode, 0);

+	ext4_inode_set_modif_time(inode, 0);

+	ext4_inode_set_del_time(inode, 0);

+	ext4_inode_set_blocks_count(&fs->sb, inode, 0);

+	ext4_inode_set_flags(inode, 0);

+	ext4_inode_set_generation(inode, 0);

+	if (inode_size > EXT4_GOOD_OLD_INODE_SIZE) {

+		u16int size = ext4_get16(&fs->sb, want_extra_isize);

+		ext4_inode_set_extra_isize(&fs->sb, inode, size);

+	}

+	memset(inode->blocks, 0, sizeof(inode->blocks));

+	inode_ref->dirty = true;

+	return 0;

+}

+int ext4_fs_free_inode(struct ext4_inode_ref *inode_ref)

+{

+	struct ext4_fs *fs = inode_ref->fs;

+	u32int offset;

+	u32int suboff;

+	int rc;

+	/* For extents must be data block destroyed by other way */

+	if ((ext4_sb_feature_incom(&fs->sb, EXT4_FINCOM_EXTENTS)) &&

+	    (ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_EXTENTS))) {

+		/* Data structures are released during truncate operation... */

+		goto finish;

+	}

+	/* Release all indirect (no data) blocks */

+	/* 1) Single indirect */

+	ext4_fsblk_t fblock = ext4_inode_get_indirect_block(inode_ref->inode, 0);

+	if (fblock != 0) {

+		int rc = ext4_balloc_free_block(inode_ref, fblock);

+		if (rc != 0)

+			return rc;

+		ext4_inode_set_indirect_block(inode_ref->inode, 0, 0);

+	}

+	u32int block_size = ext4_sb_get_block_size(&fs->sb);

+	u32int count = block_size / sizeof(u32int);

+	struct ext4_block block;

+	/* 2) Double indirect */

+	fblock = ext4_inode_get_indirect_block(inode_ref->inode, 1);

+	if (fblock != 0) {

+		int rc = ext4_trans_block_get(fs->bdev, &block, fblock);

+		if (rc != 0)

+			return rc;

+		ext4_fsblk_t ind_block;

+		for (offset = 0; offset < count; ++offset) {

+			ind_block = to_le32(((u32int *)block.data)[offset]);

+			if (ind_block == 0)

+				continue;

+			rc = ext4_balloc_free_block(inode_ref, ind_block);

+			if (rc != 0) {

+				ext4_block_set(fs->bdev, &block);

+				return rc;

+			}

+		}

+		ext4_block_set(fs->bdev, &block);

+		rc = ext4_balloc_free_block(inode_ref, fblock);

+		if (rc != 0)

+			return rc;

+		ext4_inode_set_indirect_block(inode_ref->inode, 1, 0);

+	}

+	/* 3) Tripple indirect */

+	struct ext4_block subblock;

+	fblock = ext4_inode_get_indirect_block(inode_ref->inode, 2);

+	if (fblock == 0)

+		goto finish;

+	rc = ext4_trans_block_get(fs->bdev, &block, fblock);

+	if (rc != 0)

+		return rc;

+	ext4_fsblk_t ind_block;

+	for (offset = 0; offset < count; ++offset) {

+		ind_block = to_le32(((u32int *)block.data)[offset]);

+		if (ind_block == 0)

+			continue;

+		rc = ext4_trans_block_get(fs->bdev, &subblock,

+				ind_block);

+		if (rc != 0) {

+			ext4_block_set(fs->bdev, &block);

+			return rc;

+		}

+		ext4_fsblk_t ind_subblk;

+		for (suboff = 0; suboff < count; ++suboff) {

+			ind_subblk = to_le32(((u32int *)subblock.data)[suboff]);

+			if (ind_subblk == 0)

+				continue;

+			rc = ext4_balloc_free_block(inode_ref, ind_subblk);

+			if (rc != 0) {

+				ext4_block_set(fs->bdev, &subblock);

+				ext4_block_set(fs->bdev, &block);

+				return rc;

+			}

+		}

+		ext4_block_set(fs->bdev, &subblock);

+		rc = ext4_balloc_free_block(inode_ref,

+				ind_block);

+		if (rc != 0) {

+			ext4_block_set(fs->bdev, &block);

+			return rc;

+		}

+	}

+	ext4_block_set(fs->bdev, &block);

+	rc = ext4_balloc_free_block(inode_ref, fblock);

+	if (rc != 0)

+		return rc;

+	ext4_inode_set_indirect_block(inode_ref->inode, 2, 0);

+finish:

+	/* Mark inode dirty for writing to the physical device */

+	inode_ref->dirty = true;

+	/* Free block with extended attributes if present */

+	ext4_fsblk_t xattr_block =

+	    ext4_inode_get_file_acl(inode_ref->inode, &fs->sb);

+	if (xattr_block) {

+		int rc = ext4_balloc_free_block(inode_ref, xattr_block);

+		if (rc != 0)

+			return rc;

+		ext4_inode_set_file_acl(inode_ref->inode, &fs->sb, 0);

+	}

+	/* Free inode by allocator */

+	if (ext4_inode_is_type(&fs->sb, inode_ref->inode,

+			       EXT4_INODE_MODE_DIRECTORY))

+		rc = ext4_ialloc_free_inode(fs, inode_ref->index, true);

+	else

+		rc = ext4_ialloc_free_inode(fs, inode_ref->index, false);

+	return rc;

+}

+/**@brief Release data block from i-node

+ * @param inode_ref I-node to release block from

+ * @param iblock    Logical block to be released

+ * @return Error code

+ */

+static int ext4_fs_release_inode_block(struct ext4_inode_ref *inode_ref,

+				ext4_lblk_t iblock)

+{

+	ext4_fsblk_t fblock;

+	struct ext4_fs *fs = inode_ref->fs;

+	/* Extents are handled otherwise = there is not support in this function

+	 */

+	assert(!(

+	    ext4_sb_feature_incom(&fs->sb, EXT4_FINCOM_EXTENTS) &&

+	    (ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_EXTENTS))));

+	struct ext4_inode *inode = inode_ref->inode;

+	/* Handle simple case when we are dealing with direct reference */

+	if (iblock < EXT4_INODE_DIRECT_BLOCK_COUNT) {

+		fblock = ext4_inode_get_direct_block(inode, iblock);

+		/* Sparse file */

+		if (fblock == 0)

+			return 0;

+		ext4_inode_set_direct_block(inode, iblock, 0);

+		return ext4_balloc_free_block(inode_ref, fblock);

+	}

+	/* Determine the indirection level needed to get the desired block */

+	unsigned int level = 0;

+	unsigned int i;

+	for (i = 1; i < 4; i++) {

+		if (iblock < fs->inode_block_limits[i]) {

+			level = i;

+			break;

+		}

+	}

+	if (level == 0) {

+		werrstr(Eio);

+		return -1;

+	}

+	/* Compute offsets for the topmost level */

+	u32int block_offset_in_level =

+		(u32int)(iblock - fs->inode_block_limits[level - 1]);

+	ext4_fsblk_t current_block =

+	    ext4_inode_get_indirect_block(inode, level - 1);

+	u32int offset_in_block =

+	    (u32int)(block_offset_in_level / fs->inode_blocks_per_level[level - 1]);

+	/*

+	 * Navigate through other levels, until we find the block number

+	 * or find null reference meaning we are dealing with sparse file

+	 */

+	struct ext4_block block;

+	while (level > 0) {

+		/* Sparse check */

+		if (current_block == 0)

+			return 0;

+		int rc = ext4_trans_block_get(fs->bdev, &block, current_block);

+		if (rc != 0)

+			return rc;

+		current_block =

+		    to_le32(((u32int *)block.data)[offset_in_block]);

+		/* Set zero if physical data block address found */

+		if (level == 1) {

+			((u32int *)block.data)[offset_in_block] = to_le32(0);

+			ext4_trans_set_block_dirty(block.buf);

+		}

+		rc = ext4_block_set(fs->bdev, &block);

+		if (rc != 0)

+			return rc;

+		level--;

+		/*

+		 * If we are on the last level, break here as

+		 * there is no next level to visit

+		 */

+		if (level == 0)

+			break;

+		/* Visit the next level */

+		block_offset_in_level %= fs->inode_blocks_per_level[level];

+		offset_in_block = (u32int)(block_offset_in_level /

+				  fs->inode_blocks_per_level[level - 1]);

+	}

+	fblock = current_block;

+	if (fblock == 0)

+		return 0;

+	/* Physical block is not referenced, it can be released */

+	return ext4_balloc_free_block(inode_ref, fblock);

+}

+int ext4_fs_truncate_inode(struct ext4_inode_ref *inode_ref, u64int new_size)

+{

+	struct ext4_sblock *sb = &inode_ref->fs->sb;

+	u32int i;

+	int r;

+	bool v;

+	/* Check flags, if i-node can be truncated */

+	if (!ext4_inode_can_truncate(sb, inode_ref->inode)) {

+		werrstr(Einval);

+		return -1;

+	}

+	/* If sizes are equal, nothing has to be done. */

+	u64int old_size = ext4_inode_get_size(sb, inode_ref->inode);

+	if (old_size == new_size)

+		return 0;

+	/* It's not supported to make the larger file by truncate operation */

+	if (old_size < new_size) {

+		werrstr(Einval);

+		return -1;

+	}

+	/* For symbolic link which is small enough */

+	v = ext4_inode_is_type(sb, inode_ref->inode, EXT4_INODE_MODE_SOFTLINK);

+	if (v && old_size < sizeof(inode_ref->inode->blocks) &&

+	    !ext4_inode_get_blocks_count(sb, inode_ref->inode)) {

+		char *content = (char *)inode_ref->inode->blocks + new_size;

+		memset(content, 0,

+		       sizeof(inode_ref->inode->blocks) - (u32int)new_size);

+		ext4_inode_set_size(inode_ref->inode, new_size);

+		inode_ref->dirty = true;

+		return 0;

+	}

+	i = ext4_inode_type(sb, inode_ref->inode);

+	if (i == EXT4_INODE_MODE_CHARDEV ||

+	    i == EXT4_INODE_MODE_BLOCKDEV ||

+	    i == EXT4_INODE_MODE_SOCKET) {

+		inode_ref->inode->blocks[0] = 0;

+		inode_ref->inode->blocks[1] = 0;

+		inode_ref->dirty = true;

+		return 0;

+	}

+	/* Compute how many blocks will be released */

+	u32int block_size = ext4_sb_get_block_size(sb);

+	u32int new_blocks_cnt = (u32int)((new_size + block_size - 1) / block_size);

+	u32int old_blocks_cnt = (u32int)((old_size + block_size - 1) / block_size);

+	u32int diff_blocks_cnt = old_blocks_cnt - new_blocks_cnt;

+	if ((ext4_sb_feature_incom(sb, EXT4_FINCOM_EXTENTS)) &&

+	    (ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_EXTENTS))) {

+		/* Extents require special operation */

+		if (diff_blocks_cnt) {

+			r = ext4_extent_remove_space(inode_ref, new_blocks_cnt,

+						     EXT4_EXTENT_MAX_BLOCKS);

+			if (r != 0)

+				return r;

+		}

+	} else {

+		/* Release data blocks from the end of file */

+		/* Starting from 1 because of logical blocks are numbered from 0

+		 */

+		for (i = 0; i < diff_blocks_cnt; ++i) {

+			r = ext4_fs_release_inode_block(inode_ref,

+							new_blocks_cnt + i);

+			if (r != 0)

+				return r;

+		}

+	}

+	/* Update i-node */

+	ext4_inode_set_size(inode_ref->inode, new_size);

+	inode_ref->dirty = true;

+	return 0;

+}

+/**@brief Compute 'goal' for inode index

+ * @param inode_ref Reference to inode, to allocate block for

+ * @return goal

+ */

+ext4_fsblk_t ext4_fs_inode_to_goal_block(struct ext4_inode_ref *inode_ref)

+{

+	u32int grp_inodes = ext4_get32(&inode_ref->fs->sb, inodes_per_group);

+	return (inode_ref->index - 1) / grp_inodes;

+}

+/**@brief Compute 'goal' for allocation algorithm (For blockmap).

+ * @param inode_ref Reference to inode, to allocate block for

+ * @param goal

+ * @return error code

+ */

+int ext4_fs_indirect_find_goal(struct ext4_inode_ref *inode_ref,

+			       ext4_fsblk_t *goal)

+{

+	int r;

+	struct ext4_sblock *sb = &inode_ref->fs->sb;

+	*goal = 0;

+	u64int inode_size = ext4_inode_get_size(sb, inode_ref->inode);

+	u32int block_size = ext4_sb_get_block_size(sb);

+	u32int iblock_cnt = (u32int)(inode_size / block_size);

+	if (inode_size % block_size != 0)

+		iblock_cnt++;

+	/* If inode has some blocks, get last block address + 1 */

+	if (iblock_cnt > 0) {

+		r = ext4_fs_get_inode_dblk_idx(inode_ref, iblock_cnt - 1,

+					       goal, false);

+		if (r != 0)

+			return r;

+		if (*goal != 0) {

+			(*goal)++;

+			return r;

+		}

+		/* If goal == 0, sparse file -> continue */

+	}

+	/* Identify block group of inode */

+	u32int inodes_per_bg = ext4_get32(sb, inodes_per_group);

+	u32int block_group = (inode_ref->index - 1) / inodes_per_bg;

+	block_size = ext4_sb_get_block_size(sb);

+	/* Load block group reference */

+	struct ext4_block_group_ref bg_ref;

+	r = ext4_fs_get_block_group_ref(inode_ref->fs, block_group, &bg_ref);

+	if (r != 0)

+		return r;

+	struct ext4_bgroup *bg = bg_ref.block_group;

+	/* Compute indexes */

+	u32int bg_count = ext4_block_group_cnt(sb);

+	ext4_fsblk_t itab_first_block = ext4_bg_get_inode_table_first_block(bg, sb);

+	u16int itab_item_size = ext4_get16(sb, inode_size);

+	u32int itab_bytes;

+	/* Check for last block group */

+	if (block_group < bg_count - 1) {

+		itab_bytes = inodes_per_bg * itab_item_size;

+	} else {

+		/* Last block group could be smaller */

+		u32int inodes_cnt = ext4_get32(sb, inodes_count);

+		itab_bytes = (inodes_cnt - ((bg_count - 1) * inodes_per_bg));

+		itab_bytes *= itab_item_size;

+	}

+	ext4_fsblk_t inode_table_blocks = itab_bytes / block_size;

+	if (itab_bytes % block_size)

+		inode_table_blocks++;

+	*goal = itab_first_block + inode_table_blocks;

+	return ext4_fs_put_block_group_ref(&bg_ref);

+}

+static int ext4_fs_get_inode_dblk_idx_internal(struct ext4_inode_ref *inode_ref,

+				       ext4_lblk_t iblock, ext4_fsblk_t *fblock,

+				       bool extent_create,

+				       bool support_unwritten)

+{

+	struct ext4_fs *fs = inode_ref->fs;

+	/* For empty file is situation simple */

+	if (ext4_inode_get_size(&fs->sb, inode_ref->inode) == 0) {

+		*fblock = 0;

+		return 0;

+	}

+	ext4_fsblk_t current_block;

+	USED(extent_create);

+	USED(support_unwritten);

+	/* Handle i-node using extents */

+	if ((ext4_sb_feature_incom(&fs->sb, EXT4_FINCOM_EXTENTS)) &&

+	    (ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_EXTENTS))) {

+		ext4_fsblk_t current_fsblk;

+		int rc = ext4_extent_get_blocks(inode_ref, iblock, 1,

+				&current_fsblk, extent_create, nil);

+		if (rc != 0)

+			return rc;

+		current_block = current_fsblk;

+		*fblock = current_block;

+		return 0;

+	}

+	struct ext4_inode *inode = inode_ref->inode;

+	/* Direct block are read directly from array in i-node structure */

+	if (iblock < EXT4_INODE_DIRECT_BLOCK_COUNT) {

+		current_block =

+		    ext4_inode_get_direct_block(inode, (u32int)iblock);

+		*fblock = current_block;

+		return 0;

+	}

+	/* Determine indirection level of the target block */

+	unsigned int l = 0;

+	unsigned int i;

+	for (i = 1; i < 4; i++) {

+		if (iblock < fs->inode_block_limits[i]) {

+			l = i;

+			break;

+		}

+	}

+	if (l == 0) {

+		werrstr(Eio);

+		return -1;

+	}

+	/* Compute offsets for the topmost level */

+	u32int blk_off_in_lvl = (u32int)(iblock - fs->inode_block_limits[l - 1]);

+	current_block = ext4_inode_get_indirect_block(inode, l - 1);

+	u32int off_in_blk = (u32int)(blk_off_in_lvl / fs->inode_blocks_per_level[l - 1]);

+	/* Sparse file */

+	if (current_block == 0) {

+		*fblock = 0;

+		return 0;

+	}

+	struct ext4_block block;

+	/*

+	 * Navigate through other levels, until we find the block number

+	 * or find null reference meaning we are dealing with sparse file

+	 */

+	while (l > 0) {

+		/* Load indirect block */

+		int rc = ext4_trans_block_get(fs->bdev, &block, current_block);

+		if (rc != 0)

+			return rc;

+		/* Read block address from indirect block */

+		current_block =

+		    to_le32(((u32int *)block.data)[off_in_blk]);

+		/* Put back indirect block untouched */

+		rc = ext4_block_set(fs->bdev, &block);

+		if (rc != 0)

+			return rc;

+		/* Check for sparse file */

+		if (current_block == 0) {

+			*fblock = 0;

+			return 0;

+		}

+		/* Jump to the next level */

+		l--;

+		/* Termination condition - we have address of data block loaded

+		 */

+		if (l == 0)

+			break;

+		/* Visit the next level */

+		blk_off_in_lvl %= fs->inode_blocks_per_level[l];

+		off_in_blk = (u32int)(blk_off_in_lvl / fs->inode_blocks_per_level[l - 1]);

+	}

+	*fblock = current_block;

+	return 0;

+}

+int ext4_fs_get_inode_dblk_idx(struct ext4_inode_ref *inode_ref,

+			       ext4_lblk_t iblock, ext4_fsblk_t *fblock,

+			       bool support_unwritten)

+{

+	return ext4_fs_get_inode_dblk_idx_internal(inode_ref, iblock, fblock,

+						   false, support_unwritten);

+}

+int ext4_fs_init_inode_dblk_idx(struct ext4_inode_ref *inode_ref,

+				ext4_lblk_t iblock, ext4_fsblk_t *fblock)

+{

+	return ext4_fs_get_inode_dblk_idx_internal(inode_ref, iblock, fblock,

+						   true, true);

+}

+static int ext4_fs_set_inode_data_block_index(struct ext4_inode_ref *inode_ref,

+				       ext4_lblk_t iblock, ext4_fsblk_t fblock)

+{

+	struct ext4_fs *fs = inode_ref->fs;

+	/* Handle inode using extents */

+	if ((ext4_sb_feature_incom(&fs->sb, EXT4_FINCOM_EXTENTS)) &&

+	    (ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_EXTENTS))) {

+		/* Not reachable */

+		werrstr("impossible feature combination in extents");

+		return -1;

+	}

+	/* Handle simple case when we are dealing with direct reference */

+	if (iblock < EXT4_INODE_DIRECT_BLOCK_COUNT) {

+		ext4_inode_set_direct_block(inode_ref->inode, (u32int)iblock,

+					    (u32int)fblock);

+		inode_ref->dirty = true;

+		return 0;

+	}

+	/* Determine the indirection level needed to get the desired block */

+	unsigned int l = 0;

+	unsigned int i;

+	for (i = 1; i < 4; i++) {

+		if (iblock < fs->inode_block_limits[i]) {

+			l = i;

+			break;

+		}

+	}

+	if (l == 0) {

+		werrstr(Eio);

+		return -1;

+	}

+	u32int block_size = ext4_sb_get_block_size(&fs->sb);

+	/* Compute offsets for the topmost level */

+	u32int blk_off_in_lvl = (u32int)(iblock - fs->inode_block_limits[l - 1]);

+	ext4_fsblk_t current_block =

+			ext4_inode_get_indirect_block(inode_ref->inode, l - 1);

+	u32int off_in_blk = (u32int)(blk_off_in_lvl / fs->inode_blocks_per_level[l - 1]);

+	ext4_fsblk_t new_blk;

+	struct ext4_block block;

+	struct ext4_block new_block;

+	/* Is needed to allocate indirect block on the i-node level */

+	if (current_block == 0) {

+		/* Allocate new indirect block */

+		ext4_fsblk_t goal;

+		int rc = ext4_fs_indirect_find_goal(inode_ref, &goal);

+		if (rc != 0)

+			return rc;

+		rc = ext4_balloc_alloc_block(inode_ref, goal, &new_blk);

+		if (rc != 0)

+			return rc;

+		/* Update i-node */

+		ext4_inode_set_indirect_block(inode_ref->inode, l - 1,

+				(u32int)new_blk);

+		inode_ref->dirty = true;

+		/* Load newly allocated block */

+		rc = ext4_trans_block_get_noread(fs->bdev, &new_block, new_blk);

+		if (rc != 0) {

+			ext4_balloc_free_block(inode_ref, new_blk);

+			return rc;

+		}

+		/* Initialize new block */

+		memset(new_block.data, 0, block_size);

+		ext4_trans_set_block_dirty(new_block.buf);

+		/* Put back the allocated block */

+		rc = ext4_block_set(fs->bdev, &new_block);

+		if (rc != 0)

+			return rc;

+		current_block = new_blk;

+	}

+	/*

+	 * Navigate through other levels, until we find the block number

+	 * or find null reference meaning we are dealing with sparse file

+	 */

+	while (l > 0) {

+		int rc = ext4_trans_block_get(fs->bdev, &block, current_block);

+		if (rc != 0)

+			return rc;

+		current_block = to_le32(((u32int *)block.data)[off_in_blk]);

+		if ((l > 1) && (current_block == 0)) {

+			ext4_fsblk_t goal;

+			rc = ext4_fs_indirect_find_goal(inode_ref, &goal);

+			if (rc != 0) {

+				ext4_block_set(fs->bdev, &block);

+				return rc;

+			}

+			/* Allocate new block */

+			rc =

+			    ext4_balloc_alloc_block(inode_ref, goal, &new_blk);

+			if (rc != 0) {

+				ext4_block_set(fs->bdev, &block);

+				return rc;

+			}

+			/* Load newly allocated block */

+			rc = ext4_trans_block_get_noread(fs->bdev, &new_block,

+					    new_blk);

+			if (rc != 0) {

+				ext4_block_set(fs->bdev, &block);

+				return rc;

+			}

+			/* Initialize allocated block */

+			memset(new_block.data, 0, block_size);

+			ext4_trans_set_block_dirty(new_block.buf);

+			rc = ext4_block_set(fs->bdev, &new_block);

+			if (rc != 0) {

+				ext4_block_set(fs->bdev, &block);

+				return rc;

+			}

+			/* Write block address to the parent */

+			u32int * p = (u32int * )block.data;

+			p[off_in_blk] = to_le32((u32int)new_blk);

+			ext4_trans_set_block_dirty(block.buf);

+			current_block = new_blk;

+		}

+		/* Will be finished, write the fblock address */

+		if (l == 1) {

+			u32int * p = (u32int * )block.data;

+			p[off_in_blk] = to_le32((u32int)fblock);

+			ext4_trans_set_block_dirty(block.buf);

+		}

+		rc = ext4_block_set(fs->bdev, &block);

+		if (rc != 0)

+			return rc;

+		l--;

+		/*

+		 * If we are on the last level, break here as

+		 * there is no next level to visit

+		 */

+		if (l == 0)

+			break;

+		/* Visit the next level */

+		blk_off_in_lvl %= fs->inode_blocks_per_level[l];

+		off_in_blk = (u32int)(blk_off_in_lvl / fs->inode_blocks_per_level[l - 1]);

+	}

+	return 0;

+}

+int ext4_fs_append_inode_dblk(struct ext4_inode_ref *inode_ref,

+			      ext4_fsblk_t *fblock, ext4_lblk_t *iblock)

+{

+	/* Handle extents separately */

+	if ((ext4_sb_feature_incom(&inode_ref->fs->sb, EXT4_FINCOM_EXTENTS)) &&

+	    (ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_EXTENTS))) {

+		int rc;

+		ext4_fsblk_t current_fsblk;

+		struct ext4_sblock *sb = &inode_ref->fs->sb;

+		u64int inode_size = ext4_inode_get_size(sb, inode_ref->inode);

+		u32int block_size = ext4_sb_get_block_size(sb);

+		*iblock = (u32int)((inode_size + block_size - 1) / block_size);

+		rc = ext4_extent_get_blocks(inode_ref, *iblock, 1,

+						&current_fsblk, true, nil);

+		if (rc != 0)

+			return rc;

+		*fblock = current_fsblk;

+		assert(*fblock);

+		ext4_inode_set_size(inode_ref->inode, inode_size + block_size);

+		inode_ref->dirty = true;

+		return rc;

+	}

+	struct ext4_sblock *sb = &inode_ref->fs->sb;

+	/* Compute next block index and allocate data block */

+	u64int inode_size = ext4_inode_get_size(sb, inode_ref->inode);

+	u32int block_size = ext4_sb_get_block_size(sb);

+	/* Align size i-node size */

+	if ((inode_size % block_size) != 0)

+		inode_size += block_size - (inode_size % block_size);

+	/* Logical blocks are numbered from 0 */

+	u32int new_block_idx = (u32int)(inode_size / block_size);

+	/* Allocate new physical block */

+	ext4_fsblk_t goal, phys_block;

+	int rc = ext4_fs_indirect_find_goal(inode_ref, &goal);

+	if (rc != 0)

+		return rc;

+	rc = ext4_balloc_alloc_block(inode_ref, goal, &phys_block);

+	if (rc != 0)

+		return rc;

+	/* Add physical block address to the i-node */

+	rc = ext4_fs_set_inode_data_block_index(inode_ref, new_block_idx,

+						phys_block);

+	if (rc != 0) {

+		ext4_balloc_free_block(inode_ref, phys_block);

+		return rc;

+	}

+	/* Update i-node */

+	ext4_inode_set_size(inode_ref->inode, inode_size + block_size);

+	inode_ref->dirty = true;

+	*fblock = phys_block;

+	*iblock = new_block_idx;

+	return 0;

+}

+void ext4_fs_inode_links_count_inc(struct ext4_inode_ref *inode_ref)

+{

+	u16int link;

+	bool is_dx;

+	link = ext4_inode_get_links_cnt(inode_ref->inode);

+	link++;

+	ext4_inode_set_links_cnt(inode_ref->inode, link);

+	is_dx = ext4_sb_feature_com(&inode_ref->fs->sb, EXT4_FCOM_DIR_INDEX) &&

+		ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_INDEX);

+	if (is_dx && link > 1) {

+		if (link >= EXT4_LINK_MAX || link == 2) {

+			ext4_inode_set_links_cnt(inode_ref->inode, 1);

+			u32int v;

+			v = ext4_get32(&inode_ref->fs->sb, features_read_only);

+			v |= EXT4_FRO_COM_DIR_NLINK;

+			ext4_set32(&inode_ref->fs->sb, features_read_only, v);

+		}

+	}

+}

+void ext4_fs_inode_links_count_dec(struct ext4_inode_ref *inode_ref)

+{

+	u16int links = ext4_inode_get_links_cnt(inode_ref->inode);

+	if (!ext4_inode_is_type(&inode_ref->fs->sb, inode_ref->inode,

+				EXT4_INODE_MODE_DIRECTORY)) {

+		if (links > 0)

+			ext4_inode_set_links_cnt(inode_ref->inode, links - 1);

+		return;

+	}

+	if (links > 2)

+		ext4_inode_set_links_cnt(inode_ref->inode, links - 1);

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_hash.c

@@ -1,0 +1,313 @@

+/*

+ * Copyright (c) 2013 Grzegorz Kostka ([email protected])

+ *

+ * FreeBSD:

+ * Copyright (c) 2010, 2013 Zheng Liu <[email protected]>

+ * Copyright (c) 2012, Vyacheslav Matyushin

+ * All rights reserved.

+ *

+ * Redistribution and use in source and binary forms, with or without

+ * modification, are permitted provided that the following conditions

+ * are met:

+ * 1. Redistributions of source code must retain the above copyright

+ *    notice, this list of conditions and the following disclaimer.

+ * 2. Redistributions in binary form must reproduce the above copyright

+ *    notice, this list of conditions and the following disclaimer in the

+ *    documentation and/or other materials provided with the distribution.

+ *

+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND

+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE

+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

+ * SUCH DAMAGE.

+ *

+ */

+/*

+ * The following notice applies to the code in ext2_half_md4():

+ *

+ * Copyright (C) 1990-2, RSA Data Security, Inc. All rights reserved.

+ *

+ * License to copy and use this software is granted provided that it

+ * is identified as the "RSA Data Security, Inc. MD4 Message-Digest

+ * Algorithm" in all material mentioning or referencing this software

+ * or this function.

+ *

+ * License is also granted to make and use derivative works provided

+ * that such works are identified as "derived from the RSA Data

+ * Security, Inc. MD4 Message-Digest Algorithm" in all material

+ * mentioning or referencing the derived work.

+ *

+ * RSA Data Security, Inc. makes no representations concerning either

+ * the merchantability of this software or the suitability of this

+ * software for any particular purpose. It is provided "as is"

+ * without express or implied warranty of any kind.

+ *

+ * These notices must be retained in any copies of any part of this

+ * documentation and/or software.

+ */

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+#include "ext4_debug.h"

+/* F, G, and H are MD4 functions */

+#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))

+#define G(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z)))

+#define H(x, y, z) ((x) ^ (y) ^ (z))

+/* ROTATE_LEFT rotates x left n bits */

+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))

+/*

+ * FF, GG, and HH are transformations for rounds 1, 2, and 3.

+ * Rotation is separated from addition to prevent recomputation.

+ */

+#define FF(a, b, c, d, x, s)                                                   \

+	{                                                                      \

+		(a) += F((b), (c), (d)) + (x);                                 \

+		(a) = ROTATE_LEFT((a), (s));                                   \

+	\

+}

+#define GG(a, b, c, d, x, s)                                                   \

+	{                                                                      \

+		(a) += G((b), (c), (d)) + (x) + (u32int)0x5A827999;          \

+		(a) = ROTATE_LEFT((a), (s));                                   \

+	\

+}

+#define HH(a, b, c, d, x, s)                                                   \

+	{                                                                      \

+		(a) += H((b), (c), (d)) + (x) + (u32int)0x6ED9EBA1;          \

+		(a) = ROTATE_LEFT((a), (s));                                   \

+	\

+}

+/*

+ * MD4 basic transformation.  It transforms state based on block.

+ *

+ * This is a half md4 algorithm since Linux uses this algorithm for dir

+ * index.  This function is derived from the RSA Data Security, Inc. MD4

+ * Message-Digest Algorithm and was modified as necessary.

+ *

+ * The return value of this function is u32int in Linux, but actually we don't

+ * need to check this value, so in our version this function doesn't return any

+ * value.

+ */

+static void ext2_half_md4(u32int hash[4], u32int data[8])

+{

+	u32int a = hash[0], b = hash[1], c = hash[2], d = hash[3];

+	/* Round 1 */

+	FF(a, b, c, d, data[0], 3);

+	FF(d, a, b, c, data[1], 7);

+	FF(c, d, a, b, data[2], 11);

+	FF(b, c, d, a, data[3], 19);

+	FF(a, b, c, d, data[4], 3);

+	FF(d, a, b, c, data[5], 7);

+	FF(c, d, a, b, data[6], 11);

+	FF(b, c, d, a, data[7], 19);

+	/* Round 2 */

+	GG(a, b, c, d, data[1], 3);

+	GG(d, a, b, c, data[3], 5);

+	GG(c, d, a, b, data[5], 9);

+	GG(b, c, d, a, data[7], 13);

+	GG(a, b, c, d, data[0], 3);

+	GG(d, a, b, c, data[2], 5);

+	GG(c, d, a, b, data[4], 9);

+	GG(b, c, d, a, data[6], 13);

+	/* Round 3 */

+	HH(a, b, c, d, data[3], 3);

+	HH(d, a, b, c, data[7], 9);

+	HH(c, d, a, b, data[2], 11);

+	HH(b, c, d, a, data[6], 15);

+	HH(a, b, c, d, data[1], 3);

+	HH(d, a, b, c, data[5], 9);

+	HH(c, d, a, b, data[0], 11);

+	HH(b, c, d, a, data[4], 15);

+	hash[0] += a;

+	hash[1] += b;

+	hash[2] += c;

+	hash[3] += d;

+}

+/*

+ * Tiny Encryption Algorithm.

+ */

+static void ext2_tea(u32int hash[4], u32int data[8])

+{

+	u32int tea_delta = 0x9E3779B9;

+	u32int sum;

+	u32int x = hash[0], y = hash[1];

+	int n = 16;

+	int i = 1;

+	while (n-- > 0) {

+		sum = i * tea_delta;

+		x += ((y << 4) + data[0]) ^ (y + sum) ^ ((y >> 5) + data[1]);

+		y += ((x << 4) + data[2]) ^ (x + sum) ^ ((x >> 5) + data[3]);

+		i++;

+	}

+	hash[0] += x;

+	hash[1] += y;

+}

+static u32int ext2_legacy_hash(const char *name, int len, int unsigned_char)

+{

+	u32int h0, h1 = 0x12A3FE2D, h2 = 0x37ABE8F9;

+	u32int multi = 0x6D22F5;

+	const unsigned char *uname = (const unsigned char *)name;

+	const signed char *sname = (const signed char *)name;

+	int val, i;

+	for (i = 0; i < len; i++) {

+		if (unsigned_char)

+			val = (unsigned int)*uname++;

+		else

+			val = (int)*sname++;

+		h0 = h2 + (h1 ^ (val * multi));

+		if (h0 & 0x80000000)

+			h0 -= 0x7FFFFFFF;

+		h2 = h1;

+		h1 = h0;

+	}

+	return (h1 << 1);

+}

+static void ext2_prep_hashbuf(const char *src, u32int slen, u32int *dst,

+			      int dlen, int unsigned_char)

+{

+	u32int padding = slen | (slen << 8) | (slen << 16) | (slen << 24);

+	u32int buf_val;

+	int len, i;

+	int buf_byte;

+	const unsigned char *ubuf = (const unsigned char *)src;

+	const signed char *sbuf = (const signed char *)src;

+	if (slen > (u32int)dlen)

+		len = dlen;

+	else

+		len = slen;

+	buf_val = padding;

+	for (i = 0; i < len; i++) {

+		if (unsigned_char)

+			buf_byte = (unsigned int)ubuf[i];

+		else

+			buf_byte = (int)sbuf[i];

+		if ((i % 4) == 0)

+			buf_val = padding;

+		buf_val <<= 8;

+		buf_val += buf_byte;

+		if ((i % 4) == 3) {

+			*dst++ = buf_val;

+			dlen -= sizeof(u32int);

+			buf_val = padding;

+		}

+	}

+	dlen -= sizeof(u32int);

+	if (dlen >= 0)

+		*dst++ = buf_val;

+	dlen -= sizeof(u32int);

+	while (dlen >= 0) {

+		*dst++ = padding;

+		dlen -= sizeof(u32int);

+	}

+}

+int ext2_htree_hash(const char *name, int len, const u32int *hash_seed,

+		    int hash_version, u32int *hash_major,

+		    u32int *hash_minor)

+{

+	u32int hash[4];

+	u32int data[8];

+	u32int major, minor = 0;

+	int unsigned_char = 0;

+	if (!name || !hash_major)

+		return (-1);

+	if (len < 1 || len > 255)

+		goto error;

+	hash[0] = 0x67452301;

+	hash[1] = 0xEFCDAB89;

+	hash[2] = 0x98BADCFE;

+	hash[3] = 0x10325476;

+	if (hash_seed)

+		memcpy(hash, hash_seed, sizeof(hash));

+	switch (hash_version) {

+	case EXT2_HTREE_TEA_UNSIGNED:

+		unsigned_char = 1;

+		/* FALLTHRU */

+	case EXT2_HTREE_TEA:

+		while (len > 0) {

+			ext2_prep_hashbuf(name, len, data, 16, unsigned_char);

+			ext2_tea(hash, data);

+			len -= 16;

+			name += 16;

+		}

+		major = hash[0];

+		minor = hash[1];

+		break;

+	case EXT2_HTREE_LEGACY_UNSIGNED:

+		unsigned_char = 1;

+		/* FALLTHRU */

+	case EXT2_HTREE_LEGACY:

+		major = ext2_legacy_hash(name, len, unsigned_char);

+		break;

+	case EXT2_HTREE_HALF_MD4_UNSIGNED:

+		unsigned_char = 1;

+		/* FALLTHRU */

+	case EXT2_HTREE_HALF_MD4:

+		while (len > 0) {

+			ext2_prep_hashbuf(name, len, data, 32, unsigned_char);

+			ext2_half_md4(hash, data);

+			len -= 32;

+			name += 32;

+		}

+		major = hash[1];

+		minor = hash[2];

+		break;

+	default:

+		goto error;

+	}

+	major &= ~1;

+	if (major == (EXT2_HTREE_EOF << 1))

+		major = (EXT2_HTREE_EOF - 1) << 1;

+	*hash_major = major;

+	if (hash_minor)

+		*hash_minor = minor;

+	return 0;

+error:

+	*hash_major = 0;

+	if (hash_minor)

+		*hash_minor = 0;

+	werrstr("unsupported hash version: %d", hash_version);

+	return -1;

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_ialloc.c

@@ -1,0 +1,313 @@

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+#include "ext4_debug.h"

+#include "ext4_trans.h"

+#include "ext4_ialloc.h"

+#include "ext4_super.h"

+#include "ext4_crc32.h"

+#include "ext4_fs.h"

+#include "ext4_blockdev.h"

+#include "ext4_block_group.h"

+#include "ext4_bitmap.h"

+/**@brief  Convert i-node number to relative index in block group.

+ * @param sb    Superblock

+ * @param inode I-node number to be converted

+ * @return Index of the i-node in the block group

+ */

+static u32int ext4_ialloc_inode_to_bgidx(struct ext4_sblock *sb,

+					   u32int inode)

+{

+	u32int inodes_per_group = ext4_get32(sb, inodes_per_group);

+	return (inode - 1) % inodes_per_group;

+}

+/**@brief Convert relative index of i-node to absolute i-node number.

+ * @param sb    Superblock

+ * @param index Index to be converted

+ * @return Absolute number of the i-node

+ *

+ */

+static u32int ext4_ialloc_bgidx_to_inode(struct ext4_sblock *sb,

+					   u32int index, u32int bgid)

+{

+	u32int inodes_per_group = ext4_get32(sb, inodes_per_group);

+	return bgid * inodes_per_group + (index + 1);

+}

+/**@brief Compute block group number from the i-node number.

+ * @param sb    Superblock

+ * @param inode I-node number to be found the block group for

+ * @return Block group number computed from i-node number

+ */

+static u32int ext4_ialloc_get_bgid_of_inode(struct ext4_sblock *sb,

+					      u32int inode)

+{

+	u32int inodes_per_group = ext4_get32(sb, inodes_per_group);

+	return (inode - 1) / inodes_per_group;

+}

+static u32int ext4_ialloc_bitmap_csum(struct ext4_sblock *sb,	void *bitmap)

+{

+	u32int csum = 0;

+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {

+		u32int inodes_per_group =

+			ext4_get32(sb, inodes_per_group);

+		/* First calculate crc32 checksum against fs uuid */

+		csum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid, sizeof(sb->uuid));

+		/* Then calculate crc32 checksum against inode bitmap */

+		csum = ext4_crc32c(csum, bitmap, (inodes_per_group + 7) / 8);

+	}

+	return csum;

+}

+void ext4_ialloc_set_bitmap_csum(struct ext4_sblock *sb, struct ext4_bgroup *bg, void *bitmap)

+{

+	int desc_size = ext4_sb_get_desc_size(sb);

+	u32int csum = ext4_ialloc_bitmap_csum(sb, bitmap);

+	u16int lo_csum = to_le16(csum & 0xFFFF),

+		 hi_csum = to_le16(csum >> 16);

+	if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))

+		return;

+	/* See if we need to assign a 32bit checksum */

+	bg->inode_bitmap_csum_lo = lo_csum;

+	if (desc_size == EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		bg->inode_bitmap_csum_hi = hi_csum;

+}

+static bool

+ext4_ialloc_verify_bitmap_csum(struct ext4_sblock *sb, struct ext4_bgroup *bg, void *bitmap)

+{

+	int desc_size = ext4_sb_get_desc_size(sb);

+	u32int csum = ext4_ialloc_bitmap_csum(sb, bitmap);

+	u16int lo_csum = to_le16(csum & 0xFFFF),

+		 hi_csum = to_le16(csum >> 16);

+	if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))

+		return true;

+	if (bg->inode_bitmap_csum_lo != lo_csum)

+		return false;

+	if (desc_size == EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		if (bg->inode_bitmap_csum_hi != hi_csum)

+			return false;

+	return true;

+}

+int ext4_ialloc_free_inode(struct ext4_fs *fs, u32int index, bool is_dir)

+{

+	struct ext4_sblock *sb = &fs->sb;

+	/* Compute index of block group and load it */

+	u32int block_group = ext4_ialloc_get_bgid_of_inode(sb, index);

+	struct ext4_block_group_ref bg_ref;

+	int rc = ext4_fs_get_block_group_ref(fs, block_group, &bg_ref);

+	if (rc != 0)

+		return rc;

+	struct ext4_bgroup *bg = bg_ref.block_group;

+	/* Load i-node bitmap */

+	ext4_fsblk_t bitmap_block_addr =

+	    ext4_bg_get_inode_bitmap(bg, sb);

+	struct ext4_block b;

+	rc = ext4_trans_block_get(fs->bdev, &b, bitmap_block_addr);

+	if (rc != 0)

+		return rc;

+	if (!ext4_ialloc_verify_bitmap_csum(sb, bg, b.data)) {

+		ext4_dbg(DEBUG_IALLOC,

+			DBG_WARN "Bitmap checksum failed."

+			"Group: %ud\n",

+			bg_ref.index);

+	}

+	/* Free i-node in the bitmap */

+	u32int index_in_group = ext4_ialloc_inode_to_bgidx(sb, index);

+	ext4_bmap_bit_clr(b.data, index_in_group);

+	ext4_ialloc_set_bitmap_csum(sb, bg, b.data);

+	ext4_trans_set_block_dirty(b.buf);

+	/* Put back the block with bitmap */

+	rc = ext4_block_set(fs->bdev, &b);

+	if (rc != 0) {

+		/* Error in saving bitmap */

+		ext4_fs_put_block_group_ref(&bg_ref);

+		return rc;

+	}

+	/* If released i-node is a directory, decrement used directories count

+	 */

+	if (is_dir) {

+		u32int bg_used_dirs = ext4_bg_get_used_dirs_count(bg, sb);

+		bg_used_dirs--;

+		ext4_bg_set_used_dirs_count(bg, sb, bg_used_dirs);

+	}

+	/* Update block group free inodes count */

+	u32int free_inodes = ext4_bg_get_free_inodes_count(bg, sb);

+	free_inodes++;

+	ext4_bg_set_free_inodes_count(bg, sb, free_inodes);

+	bg_ref.dirty = true;

+	/* Put back the modified block group */

+	rc = ext4_fs_put_block_group_ref(&bg_ref);

+	if (rc != 0)

+		return rc;

+	/* Update superblock free inodes count */

+	ext4_set32(sb, free_inodes_count,

+		   ext4_get32(sb, free_inodes_count) + 1);

+	return 0;

+}

+int ext4_ialloc_alloc_inode(struct ext4_fs *fs, u32int *idx, bool is_dir)

+{

+	struct ext4_sblock *sb = &fs->sb;

+	u32int bgid = fs->last_inode_bg_id;

+	u32int bg_count = ext4_block_group_cnt(sb);

+	u32int sb_free_inodes = ext4_get32(sb, free_inodes_count);

+	bool rewind = false;

+	/* Try to find free i-node in all block groups */

+	while (bgid <= bg_count) {

+		if (bgid == bg_count) {

+			if (rewind)

+				break;

+			bg_count = fs->last_inode_bg_id;

+			bgid = 0;

+			rewind = true;

+			continue;

+		}

+		/* Load block group to check */

+		struct ext4_block_group_ref bg_ref;

+		int rc = ext4_fs_get_block_group_ref(fs, bgid, &bg_ref);

+		if (rc != 0)

+			return rc;

+		struct ext4_bgroup *bg = bg_ref.block_group;

+		/* Read necessary values for algorithm */

+		u32int free_inodes = ext4_bg_get_free_inodes_count(bg, sb);

+		u32int used_dirs = ext4_bg_get_used_dirs_count(bg, sb);

+		/* Check if this block group is good candidate for allocation */

+		if (free_inodes > 0) {

+			/* Load block with bitmap */

+			ext4_fsblk_t bmp_blk_add = ext4_bg_get_inode_bitmap(bg, sb);

+			struct ext4_block b;

+			rc = ext4_trans_block_get(fs->bdev, &b, bmp_blk_add);

+			if (rc != 0) {

+				ext4_fs_put_block_group_ref(&bg_ref);

+				return rc;

+			}

+			if (!ext4_ialloc_verify_bitmap_csum(sb, bg, b.data)) {

+				ext4_dbg(DEBUG_IALLOC,

+					DBG_WARN "Bitmap checksum failed."

+					"Group: %ud\n",

+					bg_ref.index);

+			}

+			/* Try to allocate i-node in the bitmap */

+			u32int inodes_in_bg;

+			u32int idx_in_bg;

+			inodes_in_bg = ext4_inodes_in_group_cnt(sb, bgid);

+			bool no_space;

+			rc = ext4_bmap_bit_find_clr(b.data, 0, inodes_in_bg, &idx_in_bg, &no_space);

+			/* Block group does not have any free i-node */

+			if (no_space) {

+				rc = ext4_block_set(fs->bdev, &b);

+				if (rc != 0) {

+					ext4_fs_put_block_group_ref(&bg_ref);

+					return rc;

+				}

+				rc = ext4_fs_put_block_group_ref(&bg_ref);

+				if (rc != 0)

+					return rc;

+				continue;

+			}

+			ext4_bmap_bit_set(b.data, idx_in_bg);

+			/* Free i-node found, save the bitmap */

+			ext4_ialloc_set_bitmap_csum(sb,bg,

+						    b.data);

+			ext4_trans_set_block_dirty(b.buf);

+			ext4_block_set(fs->bdev, &b);

+			if (rc != 0) {

+				ext4_fs_put_block_group_ref(&bg_ref);

+				return rc;

+			}

+			/* Modify filesystem counters */

+			free_inodes--;

+			ext4_bg_set_free_inodes_count(bg, sb, free_inodes);

+			/* Increment used directories counter */

+			if (is_dir) {

+				used_dirs++;

+				ext4_bg_set_used_dirs_count(bg, sb, used_dirs);

+			}

+			/* Decrease unused inodes count */

+			u32int unused =

+			    ext4_bg_get_itable_unused(bg, sb);

+			u32int free = inodes_in_bg - unused;

+			if (idx_in_bg >= free) {

+				unused = inodes_in_bg - (idx_in_bg + 1);

+				ext4_bg_set_itable_unused(bg, sb, unused);

+			}

+			/* Save modified block group */

+			bg_ref.dirty = true;

+			rc = ext4_fs_put_block_group_ref(&bg_ref);

+			if (rc != 0)

+				return rc;

+			/* Update superblock */

+			sb_free_inodes--;

+			ext4_set32(sb, free_inodes_count, sb_free_inodes);

+			/* Compute the absolute i-nodex number */

+			*idx = ext4_ialloc_bgidx_to_inode(sb, idx_in_bg, bgid);

+			fs->last_inode_bg_id = bgid;

+			return 0;

+		}

+		/* Block group not modified, put it and jump to the next block

+		 * group */

+		ext4_fs_put_block_group_ref(&bg_ref);

+		if (rc != 0)

+			return rc;

+		++bgid;

+	}

+	werrstr(Enospc);

+	return -1;

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_inode.c

@@ -1,0 +1,365 @@

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+#include "ext4_debug.h"

+#include "ext4_inode.h"

+#include "ext4_super.h"

+/**@brief  Compute number of bits for block count.

+ * @param block_size Filesystem block_size

+ * @return Number of bits

+ */

+static u32int ext4_inode_block_bits_count(u32int block_size)

+{

+	u32int bits = 8;

+	u32int size = block_size;

+	do {

+		bits++;

+		size = size >> 1;

+	} while (size > 256);

+	return bits;

+}

+u32int ext4_inode_get_mode(struct ext4_sblock *sb, struct ext4_inode *inode)

+{

+	u32int v = to_le16(inode->mode);

+	if (ext4_get32(sb, creator_os) == EXT4_SUPERBLOCK_OS_HURD) {

+		v |= ((u32int)to_le16(inode->osd2.hurd2.mode_high)) << 16;

+	}

+	return v;

+}

+void ext4_inode_set_mode(struct ext4_sblock *sb, struct ext4_inode *inode,

+			 u32int mode)

+{

+	inode->mode = to_le16((mode << 16) >> 16);

+	if (ext4_get32(sb, creator_os) == EXT4_SUPERBLOCK_OS_HURD)

+		inode->osd2.hurd2.mode_high = to_le16(mode >> 16);

+}

+u32int ext4_inode_get_uid(struct ext4_inode *inode)

+{

+	return to_le32(inode->uid);

+}

+void ext4_inode_set_uid(struct ext4_inode *inode, u32int uid)

+{

+	inode->uid = to_le32(uid);

+}

+u64int ext4_inode_get_size(struct ext4_sblock *sb, struct ext4_inode *inode)

+{

+	u64int v = to_le32(inode->size_lo);

+	if ((ext4_get32(sb, rev_level) > 0) &&

+	    (ext4_inode_is_type(sb, inode, EXT4_INODE_MODE_FILE)))

+		v |= ((u64int)to_le32(inode->size_hi)) << 32;

+	return v;

+}

+void ext4_inode_set_size(struct ext4_inode *inode, u64int size)

+{

+	inode->size_lo = to_le32((size << 32) >> 32);

+	inode->size_hi = to_le32(size >> 32);

+}

+u32int ext4_inode_get_csum(struct ext4_sblock *sb, struct ext4_inode *inode)

+{

+	u16int inode_size = ext4_get16(sb, inode_size);

+	u32int v = to_le16(inode->osd2.linux2.checksum_lo);

+	if (inode_size > EXT4_GOOD_OLD_INODE_SIZE)

+		v |= ((u32int)to_le16(inode->checksum_hi)) << 16;

+	return v;

+}

+void ext4_inode_set_csum(struct ext4_sblock *sb, struct ext4_inode *inode,

+			u32int checksum)

+{

+	u16int inode_size = ext4_get16(sb, inode_size);

+	inode->osd2.linux2.checksum_lo =

+		to_le16((checksum << 16) >> 16);

+	if (inode_size > EXT4_GOOD_OLD_INODE_SIZE)

+		inode->checksum_hi = to_le16(checksum >> 16);

+}

+u32int ext4_inode_get_access_time(struct ext4_inode *inode)

+{

+	return to_le32(inode->access_time);

+}

+void ext4_inode_set_access_time(struct ext4_inode *inode, u32int time)

+{

+	inode->access_time = to_le32(time);

+}

+u32int ext4_inode_get_change_inode_time(struct ext4_inode *inode)

+{

+	return to_le32(inode->change_inode_time);

+}

+void ext4_inode_set_change_inode_time(struct ext4_inode *inode, u32int time)

+{

+	inode->change_inode_time = to_le32(time);

+}

+u32int ext4_inode_get_modif_time(struct ext4_inode *inode)

+{

+	return to_le32(inode->modification_time);

+}

+void ext4_inode_set_modif_time(struct ext4_inode *inode, u32int time)

+{

+	inode->modification_time = to_le32(time);

+}

+u32int ext4_inode_get_del_time(struct ext4_inode *inode)

+{

+	return to_le32(inode->deletion_time);

+}

+void ext4_inode_set_del_time(struct ext4_inode *inode, u32int time)

+{

+	inode->deletion_time = to_le32(time);

+}

+u32int ext4_inode_get_creation_time(struct ext4_inode *inode)

+{

+	return to_le32(inode->crtime);

+}

+u32int ext4_inode_get_gid(struct ext4_inode *inode)

+{

+	return to_le32(inode->gid);

+}

+void ext4_inode_set_gid(struct ext4_inode *inode, u32int gid)

+{

+	inode->gid = to_le32(gid);

+}

+u16int ext4_inode_get_links_cnt(struct ext4_inode *inode)

+{

+	return to_le16(inode->links_count);

+}

+void ext4_inode_set_links_cnt(struct ext4_inode *inode, u16int cnt)

+{

+	inode->links_count = to_le16(cnt);

+}

+u64int ext4_inode_get_blocks_count(struct ext4_sblock *sb,

+				     struct ext4_inode *inode)

+{

+	u64int cnt = to_le32(inode->blocks_count_lo);

+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_HUGE_FILE)) {

+		/* 48-bit field */

+		cnt |= (u64int)to_le16(inode->osd2.linux2.blocks_high) << 32;

+		if (ext4_inode_has_flag(inode, EXT4_INODE_FLAG_HUGE_FILE)) {

+			u32int block_count = ext4_sb_get_block_size(sb);

+			u32int b = ext4_inode_block_bits_count(block_count);

+			return cnt << (b - 9);

+		}

+	}

+	return cnt;

+}

+int ext4_inode_set_blocks_count(struct ext4_sblock *sb,

+				struct ext4_inode *inode, u64int count)

+{

+	/* 32-bit maximum */

+	u64int max = 0;

+	max = ~max >> 32;

+	if (count <= max) {

+		inode->blocks_count_lo = to_le32((u32int)count);

+		inode->osd2.linux2.blocks_high = 0;

+		ext4_inode_clear_flag(inode, EXT4_INODE_FLAG_HUGE_FILE);

+		return 0;

+	}

+	/* Check if there can be used huge files (many blocks) */

+	if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_HUGE_FILE)) {

+		werrstr(Einval);

+		return -1;

+	}

+	/* 48-bit maximum */

+	max = 0;

+	max = ~max >> 16;

+	if (count <= max) {

+		inode->blocks_count_lo = to_le32((u32int)count);

+		inode->osd2.linux2.blocks_high = to_le16((u16int)(count >> 32));

+		ext4_inode_clear_flag(inode, EXT4_INODE_FLAG_HUGE_FILE);

+	} else {

+		u32int block_count = ext4_sb_get_block_size(sb);

+		u32int block_bits =ext4_inode_block_bits_count(block_count);

+		ext4_inode_set_flag(inode, EXT4_INODE_FLAG_HUGE_FILE);

+		count = count >> (block_bits - 9);

+		inode->blocks_count_lo = to_le32((u32int)count);

+		inode->osd2.linux2.blocks_high = to_le16((u16int)(count >> 32));

+	}

+	return 0;

+}

+u32int ext4_inode_get_flags(struct ext4_inode *inode)

+{

+	return to_le32(inode->flags);

+}

+void ext4_inode_set_flags(struct ext4_inode *inode, u32int flags)

+{

+	inode->flags = to_le32(flags);

+}

+u32int ext4_inode_get_generation(struct ext4_inode *inode)

+{

+	return to_le32(inode->generation);

+}

+void ext4_inode_set_generation(struct ext4_inode *inode, u32int gen)

+{

+	inode->generation = to_le32(gen);

+}

+u16int ext4_inode_get_extra_isize(struct ext4_sblock *sb,

+				    struct ext4_inode *inode)

+{

+	u16int inode_size = ext4_get16(sb, inode_size);

+	if (inode_size > EXT4_GOOD_OLD_INODE_SIZE)

+		return to_le16(inode->extra_isize);

+	else

+		return 0;

+}

+void ext4_inode_set_extra_isize(struct ext4_sblock *sb,

+				struct ext4_inode *inode,

+				u16int size)

+{

+	u16int inode_size = ext4_get16(sb, inode_size);

+	if (inode_size > EXT4_GOOD_OLD_INODE_SIZE)

+		inode->extra_isize = to_le16(size);

+}

+u64int ext4_inode_get_file_acl(struct ext4_inode *inode,

+				 struct ext4_sblock *sb)

+{

+	u64int v = to_le32(inode->file_acl_lo);

+	if (ext4_get32(sb, creator_os) == EXT4_SUPERBLOCK_OS_LINUX)

+		v |= (u32int)to_le16(inode->osd2.linux2.file_acl_high) << 16;

+	return v;

+}

+void ext4_inode_set_file_acl(struct ext4_inode *inode, struct ext4_sblock *sb,

+			     u64int acl)

+{

+	inode->file_acl_lo = to_le32((acl << 32) >> 32);

+	if (ext4_get32(sb, creator_os) == EXT4_SUPERBLOCK_OS_LINUX)

+		inode->osd2.linux2.file_acl_high = to_le16((u16int)(acl >> 32));

+}

+u32int ext4_inode_get_direct_block(struct ext4_inode *inode, u32int idx)

+{

+	return to_le32(inode->blocks[idx]);

+}

+void ext4_inode_set_direct_block(struct ext4_inode *inode, u32int idx,

+				 u32int block)

+{

+	inode->blocks[idx] = to_le32(block);

+}

+u32int ext4_inode_get_indirect_block(struct ext4_inode *inode, u32int idx)

+{

+	return to_le32(inode->blocks[idx + EXT4_INODE_INDIRECT_BLOCK]);

+}

+void ext4_inode_set_indirect_block(struct ext4_inode *inode, u32int idx,

+				   u32int block)

+{

+	inode->blocks[idx + EXT4_INODE_INDIRECT_BLOCK] = to_le32(block);

+}

+u32int ext4_inode_get_dev(struct ext4_inode *inode)

+{

+	u32int dev_0, dev_1;

+	dev_0 = ext4_inode_get_direct_block(inode, 0);

+	dev_1 = ext4_inode_get_direct_block(inode, 1);

+	if (dev_0)

+		return dev_0;

+	else

+		return dev_1;

+}

+void ext4_inode_set_dev(struct ext4_inode *inode, u32int dev)

+{

+	if (dev & ~0xFFFF)

+		ext4_inode_set_direct_block(inode, 1, dev);

+	else

+		ext4_inode_set_direct_block(inode, 0, dev);

+}

+u32int ext4_inode_type(struct ext4_sblock *sb, struct ext4_inode *inode)

+{

+	return (ext4_inode_get_mode(sb, inode) & EXT4_INODE_MODE_TYPE_MASK);

+}

+bool ext4_inode_is_type(struct ext4_sblock *sb, struct ext4_inode *inode,

+			u32int type)

+{

+	return ext4_inode_type(sb, inode) == type;

+}

+bool ext4_inode_has_flag(struct ext4_inode *inode, u32int f)

+{

+	return ext4_inode_get_flags(inode) & f;

+}

+void ext4_inode_clear_flag(struct ext4_inode *inode, u32int f)

+{

+	u32int flags = ext4_inode_get_flags(inode);

+	flags = flags & (~f);

+	ext4_inode_set_flags(inode, flags);

+}

+void ext4_inode_set_flag(struct ext4_inode *inode, u32int f)

+{

+	u32int flags = ext4_inode_get_flags(inode);

+	flags = flags | f;

+	ext4_inode_set_flags(inode, flags);

+}

+bool ext4_inode_can_truncate(struct ext4_sblock *sb, struct ext4_inode *inode)

+{

+	if ((ext4_inode_has_flag(inode, EXT4_INODE_FLAG_APPEND)) ||

+	    (ext4_inode_has_flag(inode, EXT4_INODE_FLAG_IMMUTABLE)))

+		return false;

+	if ((ext4_inode_is_type(sb, inode, EXT4_INODE_MODE_FILE)) ||

+	    (ext4_inode_is_type(sb, inode, EXT4_INODE_MODE_DIRECTORY)) ||

+	    (ext4_inode_is_type(sb, inode, EXT4_INODE_MODE_SOFTLINK)))

+		return true;

+	return false;

+}

+struct ext4_extent_header *

+ext4_inode_get_extent_header(struct ext4_inode *inode)

+{

+	return (struct ext4_extent_header *)inode->blocks;

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_journal.c

@@ -1,0 +1,2232 @@

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+#include "ext4_debug.h"

+#include "ext4_fs.h"

+#include "ext4_super.h"

+#include "ext4_journal.h"

+#include "ext4_blockdev.h"

+#include "ext4_crc32.h"

+#include "ext4_journal.h"

+/**@brief  Revoke entry during journal replay.*/

+struct revoke_entry {

+	/**@brief  Block number not to be replayed.*/

+	ext4_fsblk_t block;

+	/**@brief  For any transaction id smaller

+	 *         than trans_id, records of @block

+	 *         in those transactions should not

+	 *         be replayed.*/

+	u32int trans_id;

+	/**@brief  Revoke tree node.*/

+	RB_ENTRY(revoke_entry) revoke_node;

+};

+/**@brief  Valid journal replay information.*/

+struct recover_info {

+	/**@brief  Starting transaction id.*/

+	u32int start_trans_id;

+	/**@brief  Ending transaction id.*/

+	u32int last_trans_id;

+	/**@brief  Used as internal argument.*/

+	u32int this_trans_id;

+	/**@brief  No of transactions went through.*/

+	u32int trans_cnt;

+	/**@brief  RB-Tree storing revoke entries.*/

+	RB_HEAD(jbd_revoke, revoke_entry) revoke_root;

+};

+/**@brief  Journal replay internal arguments.*/

+struct replay_arg {

+	/**@brief  Journal replay information.*/

+	struct recover_info *info;

+	/**@brief  Current block we are on.*/

+	u32int *this_block;

+	/**@brief  Current trans_id we are on.*/

+	u32int this_trans_id;

+};

+/* Make sure we wrap around the log correctly! */

+#define wrap(sb, var)						\

+do {									\

+	if (var >= jbd_get32((sb), maxlen))					\

+		var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));	\

+} while (0)

+static inline s32int

+trans_id_diff(u32int x, u32int y)

+{

+	s32int diff = x - y;

+	return diff;

+}

+static int

+jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)

+{

+	if (a->block > b->block)

+		return 1;

+	else if (a->block < b->block)

+		return -1;

+	return 0;

+}

+static int

+jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b)

+{

+	if (a->lba > b->lba)

+		return 1;

+	else if (a->lba < b->lba)

+		return -1;

+	return 0;

+}

+static int

+jbd_revoke_rec_cmp(struct jbd_revoke_rec *a, struct jbd_revoke_rec *b)

+{

+	if (a->lba > b->lba)

+		return 1;

+	else if (a->lba < b->lba)

+		return -1;

+	return 0;

+}

+RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,

+		     jbd_revoke_entry_cmp, static inline)

+RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,

+		     jbd_block_rec_cmp, static inline)

+RB_GENERATE_INTERNAL(jbd_revoke_tree, jbd_revoke_rec, revoke_node,

+		     jbd_revoke_rec_cmp, static inline)

+#define jbd_alloc_revoke_entry() ext4_calloc(1, sizeof(struct revoke_entry))

+#define jbd_free_revoke_entry(addr) ext4_free(addr)

+static int jbd_has_csum(struct jbd_sb *jbd_sb)

+{

+	if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2))

+		return 2;

+	if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3))

+		return 3;

+	return 0;

+}

+static u32int jbd_sb_csum(struct jbd_sb *jbd_sb)

+{

+	u32int checksum = 0;

+	if (jbd_has_csum(jbd_sb)) {

+		u32int orig_checksum = jbd_sb->checksum;

+		jbd_set32(jbd_sb, checksum, 0);

+		/* Calculate crc32c checksum against tho whole superblock */

+		checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_sb,

+				JBD_SUPERBLOCK_SIZE);

+		jbd_sb->checksum = orig_checksum;

+	}

+	return checksum;

+}

+static void jbd_sb_csum_set(struct jbd_sb *jbd_sb)

+{

+	if (!jbd_has_csum(jbd_sb))

+		return;

+	jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb));

+}

+static bool

+jbd_verify_sb_csum(struct jbd_sb *jbd_sb)

+{

+	if (!jbd_has_csum(jbd_sb))

+		return true;

+	return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum);

+}

+static u32int jbd_meta_csum(struct jbd_fs *jbd_fs,

+			      struct jbd_bhdr *bhdr)

+{

+	u32int checksum = 0;

+	if (jbd_has_csum(&jbd_fs->sb)) {

+		u32int block_size = jbd_get32(&jbd_fs->sb, blocksize);

+		struct jbd_block_tail *tail =

+			(struct jbd_block_tail *)((char *)bhdr + block_size -

+				sizeof(struct jbd_block_tail));

+		u32int orig_checksum = tail->checksum;

+		tail->checksum = 0;

+		/* First calculate crc32c checksum against fs uuid */

+		checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,

+				       sizeof(jbd_fs->sb.uuid));

+		/* Calculate crc32c checksum against tho whole block */

+		checksum = ext4_crc32c(checksum, bhdr,

+				block_size);

+		tail->checksum = orig_checksum;

+	}

+	return checksum;

+}

+static void jbd_meta_csum_set(struct jbd_fs *jbd_fs,

+			      struct jbd_bhdr *bhdr)

+{

+	u32int block_size = jbd_get32(&jbd_fs->sb, blocksize);

+	struct jbd_block_tail *tail = (struct jbd_block_tail *)

+				((char *)bhdr + block_size -

+				sizeof(struct jbd_block_tail));

+	if (!jbd_has_csum(&jbd_fs->sb))

+		return;

+	tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr));

+}

+static bool

+jbd_verify_meta_csum(struct jbd_fs *jbd_fs,

+		     struct jbd_bhdr *bhdr)

+{

+	u32int block_size = jbd_get32(&jbd_fs->sb, blocksize);

+	struct jbd_block_tail *tail = (struct jbd_block_tail *)

+				((char *)bhdr + block_size -

+				sizeof(struct jbd_block_tail));

+	if (!jbd_has_csum(&jbd_fs->sb))

+		return true;

+	return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum);

+}

+static u32int jbd_commit_csum(struct jbd_fs *jbd_fs,

+			      struct jbd_commit_header *header)

+{

+	u32int checksum = 0;

+	if (jbd_has_csum(&jbd_fs->sb)) {

+		u8int orig_checksum_type = header->chksum_type,

+			 orig_checksum_size = header->chksum_size;

+		u32int orig_checksum = header->chksum[0];

+		u32int block_size = jbd_get32(&jbd_fs->sb, blocksize);

+		header->chksum_type = 0;

+		header->chksum_size = 0;

+		header->chksum[0] = 0;

+		/* First calculate crc32c checksum against fs uuid */

+		checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,

+				       sizeof(jbd_fs->sb.uuid));

+		/* Calculate crc32c checksum against tho whole block */

+		checksum = ext4_crc32c(checksum, header,

+				block_size);

+		header->chksum_type = orig_checksum_type;

+		header->chksum_size = orig_checksum_size;

+		header->chksum[0] = orig_checksum;

+	}

+	return checksum;

+}

+static void jbd_commit_csum_set(struct jbd_fs *jbd_fs,

+			      struct jbd_commit_header *header)

+{

+	if (!jbd_has_csum(&jbd_fs->sb))

+		return;

+	header->chksum_type = 0;

+	header->chksum_size = 0;

+	header->chksum[0] = jbd_commit_csum(jbd_fs, header);

+}

+static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs,

+				   struct jbd_commit_header *header)

+{

+	if (!jbd_has_csum(&jbd_fs->sb))

+		return true;

+	return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs,

+					    header));

+}

+/*

+ * NOTE: We only make use of @csum parameter when

+ *       JBD_FEATURE_COMPAT_CHECKSUM is enabled.

+ */

+static u32int jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf,

+			       u32int csum,

+			       u32int sequence)

+{

+	u32int checksum = 0;

+	if (jbd_has_csum(&jbd_fs->sb)) {

+		u32int block_size = jbd_get32(&jbd_fs->sb, blocksize);

+		/* First calculate crc32c checksum against fs uuid */

+		checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,

+				       sizeof(jbd_fs->sb.uuid));

+		/* Then calculate crc32c checksum against sequence no. */

+		checksum = ext4_crc32c(checksum, &sequence,

+				sizeof(u32int));

+		/* Calculate crc32c checksum against tho whole block */

+		checksum = ext4_crc32c(checksum, buf,

+				block_size);

+	} else if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,

+				     JBD_FEATURE_COMPAT_CHECKSUM)) {

+		u32int block_size = jbd_get32(&jbd_fs->sb, blocksize);

+		/* Calculate crc32c checksum against tho whole block */

+		checksum = ext4_crc32(csum, buf,

+				block_size);

+	}

+	return checksum;

+}

+static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag,

+				   u32int checksum)

+{

+	int ver = jbd_has_csum(&jbd_fs->sb);

+	if (!ver)

+		return;

+	if (ver == 2) {

+		struct jbd_block_tag *tag = __tag;

+		tag->checksum = (u16int)to_be32(checksum);

+	} else {

+		struct jbd_block_tag3 *tag = __tag;

+		tag->checksum = to_be32(checksum);

+	}

+}

+/**@brief  Write jbd superblock to disk.

+ * @param  jbd_fs jbd filesystem

+ * @param  s jbd superblock

+ * @return standard error code*/

+static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)

+{

+	int rc;

+	struct ext4_fs *fs = jbd_fs->inode_ref.fs;

+	u64int offset;

+	ext4_fsblk_t fblock;

+	rc = jbd_inode_bmap(jbd_fs, 0, &fblock);

+	if (rc != 0)

+		return rc;

+	jbd_sb_csum_set(s);

+	offset = fblock * ext4_sb_get_block_size(&fs->sb);

+	return ext4_block_writebytes(fs->bdev, offset, s,

+				     EXT4_SUPERBLOCK_SIZE);

+}

+/**@brief  Read jbd superblock from disk.

+ * @param  jbd_fs jbd filesystem

+ * @param  s jbd superblock

+ * @return standard error code*/

+static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)

+{

+	int rc;

+	struct ext4_fs *fs = jbd_fs->inode_ref.fs;

+	u64int offset;

+	ext4_fsblk_t fblock;

+	rc = jbd_inode_bmap(jbd_fs, 0, &fblock);

+	if (rc != 0)

+		return rc;

+	offset = fblock * ext4_sb_get_block_size(&fs->sb);

+	return ext4_block_readbytes(fs->bdev, offset, s,

+				    EXT4_SUPERBLOCK_SIZE);

+}

+/**@brief  Verify jbd superblock.

+ * @param  sb jbd superblock

+ * @return true if jbd superblock is valid */

+static bool jbd_verify_sb(struct jbd_sb *sb)

+{

+	struct jbd_bhdr *header = &sb->header;

+	if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)

+		return false;

+	if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&

+	    jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)

+		return false;

+	return jbd_verify_sb_csum(sb);

+}

+/**@brief  Write back dirty jbd superblock to disk.

+ * @param  jbd_fs jbd filesystem

+ * @return standard error code*/

+static int jbd_write_sb(struct jbd_fs *jbd_fs)

+{

+	int rc = 0;

+	if (jbd_fs->dirty) {

+		rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);

+		if (rc != 0)

+			return rc;

+		jbd_fs->dirty = false;

+	}

+	return rc;

+}

+/**@brief  Get reference to jbd filesystem.

+ * @param  fs Filesystem to load journal of

+ * @param  jbd_fs jbd filesystem

+ * @return standard error code*/

+int jbd_get_fs(struct ext4_fs *fs,

+	       struct jbd_fs *jbd_fs)

+{

+	int rc;

+	u32int journal_ino;

+	memset(jbd_fs, 0, sizeof(struct jbd_fs));

+	/* See if there is journal inode on this filesystem.*/

+	/* FIXME: detection on existance ofbkejournal bdev is

+	 *        missing.*/

+	journal_ino = ext4_get32(&fs->sb, journal_inode_number);

+	rc = ext4_fs_get_inode_ref(fs,

+				   journal_ino,

+				   &jbd_fs->inode_ref);

+	if (rc != 0)

+		return rc;

+	rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);

+	if (rc != 0)

+		goto Error;

+	if (!jbd_verify_sb(&jbd_fs->sb)) {

+		werrstr(Eio);

+		rc = -1;

+		goto Error;

+	}

+	if (rc == 0)

+		jbd_fs->bdev = fs->bdev;

+	return rc;

+Error:

+	ext4_fs_put_inode_ref(&jbd_fs->inode_ref);

+	memset(jbd_fs, 0, sizeof(struct jbd_fs));

+	return rc;

+}

+/**@brief  Put reference of jbd filesystem.

+ * @param  jbd_fs jbd filesystem

+ * @return standard error code*/

+int jbd_put_fs(struct jbd_fs *jbd_fs)

+{

+	int rc;

+	rc = jbd_write_sb(jbd_fs);

+	ext4_fs_put_inode_ref(&jbd_fs->inode_ref);

+	return rc;

+}

+/**@brief  Data block lookup helper.

+ * @param  jbd_fs jbd filesystem

+ * @param  iblock block index

+ * @param  fblock logical block address

+ * @return standard error code*/

+int jbd_inode_bmap(struct jbd_fs *jbd_fs,

+		   ext4_lblk_t iblock,

+		   ext4_fsblk_t *fblock)

+{

+	int rc = ext4_fs_get_inode_dblk_idx(

+			&jbd_fs->inode_ref,

+			iblock,

+			fblock,

+			false);

+	return rc;

+}

+/**@brief   jbd block get function (through cache).

+ * @param   jbd_fs jbd filesystem

+ * @param   block block descriptor

+ * @param   fblock jbd logical block address

+ * @return  standard error code*/

+static int jbd_block_get(struct jbd_fs *jbd_fs,

+		  struct ext4_block *block,

+		  ext4_fsblk_t fblock)

+{

+	/* TODO: journal device. */

+	int rc;

+	struct ext4_blockdev *bdev = jbd_fs->bdev;

+	ext4_lblk_t iblock = (ext4_lblk_t)fblock;

+	/* Lookup the logical block address of

+	 * fblock.*/

+	rc = jbd_inode_bmap(jbd_fs, iblock,

+			    &fblock);

+	if (rc != 0)

+		return rc;

+	rc = ext4_block_get(bdev, block, fblock);

+	/* If succeeded, mark buffer as BC_FLUSH to indicate

+	 * that data should be written to disk immediately.*/

+	if (rc == 0) {

+		ext4_bcache_set_flag(block->buf, BC_FLUSH);

+		/* As we don't want to occupy too much space

+		 * in block cache, we set this buffer BC_TMP.*/

+		ext4_bcache_set_flag(block->buf, BC_TMP);

+	}

+	return rc;

+}

+/**@brief   jbd block get function (through cache, don't read).

+ * @param   jbd_fs jbd filesystem

+ * @param   block block descriptor

+ * @param   fblock jbd logical block address

+ * @return  standard error code*/

+static int jbd_block_get_noread(struct jbd_fs *jbd_fs,

+			 struct ext4_block *block,

+			 ext4_fsblk_t fblock)

+{

+	/* TODO: journal device. */

+	int rc;

+	struct ext4_blockdev *bdev = jbd_fs->bdev;

+	ext4_lblk_t iblock = (ext4_lblk_t)fblock;

+	rc = jbd_inode_bmap(jbd_fs, iblock,

+			    &fblock);

+	if (rc != 0)

+		return rc;

+	rc = ext4_block_get_noread(bdev, block, fblock);

+	if (rc == 0)

+		ext4_bcache_set_flag(block->buf, BC_FLUSH);

+	return rc;

+}

+/**@brief   jbd block set procedure (through cache).

+ * @param   jbd_fs jbd filesystem

+ * @param   block block descriptor

+ * @return  standard error code*/

+static int jbd_block_set(struct jbd_fs *jbd_fs,

+		  struct ext4_block *block)

+{

+	struct ext4_blockdev *bdev = jbd_fs->bdev;

+	return ext4_block_set(bdev, block);

+}

+/**@brief  helper functions to calculate

+ *         block tag size, not including UUID part.

+ * @param  jbd_fs jbd filesystem

+ * @return tag size in bytes*/

+static int jbd_tag_bytes(struct jbd_fs *jbd_fs)

+{

+	int size;

+	/* It is very easy to deal with the case which

+	 * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/

+	if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,

+				     JBD_FEATURE_INCOMPAT_CSUM_V3))

+		return sizeof(struct jbd_block_tag3);

+	size = sizeof(struct jbd_block_tag);

+	/* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,

+	 * add 2 bytes to size.*/

+	if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,

+				     JBD_FEATURE_INCOMPAT_CSUM_V2))

+		size += sizeof(u16int);

+	if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,

+				     JBD_FEATURE_INCOMPAT_64BIT))

+		return size;

+	/* If block number is 4 bytes in size,

+	 * minus 4 bytes from size */

+	return size - sizeof(u32int);

+}

+/**@brief  Tag information. */

+struct tag_info {

+	/**@brief  Tag size in bytes, including UUID part.*/

+	int tag_bytes;

+	/**@brief  block number stored in this tag.*/

+	ext4_fsblk_t block;

+	/**@brief  Is the first 4 bytes of block equals to

+	 *	   JBD_MAGIC_NUMBER? */

+	bool is_escape;

+	/**@brief  whether UUID part exists or not.*/

+	bool uuid_exist;

+	/**@brief  UUID content if UUID part exists.*/

+	u8int uuid[UUID_SIZE];

+	/**@brief  Is this the last tag? */

+	bool last_tag;

+	/**@brief  crc32c checksum. */

+	u32int checksum;

+};

+/**@brief  Extract information from a block tag.

+ * @param  __tag pointer to the block tag

+ * @param  tag_bytes block tag size of this jbd filesystem

+ * @param  remaining size in buffer containing the block tag

+ * @param  tag_info information of this tag.

+ * @return  0 when succeed, otherwise return Einval.*/

+static int

+jbd_extract_block_tag(struct jbd_fs *jbd_fs,

+		      void *__tag,

+		      int tag_bytes,

+		      s32int remain_buf_size,

+		      struct tag_info *tag_info)

+{

+	char *uuid_start;

+	tag_info->tag_bytes = tag_bytes;

+	tag_info->uuid_exist = false;

+	tag_info->last_tag = false;

+	tag_info->is_escape = false;

+	/* See whether it is possible to hold a valid block tag.*/

+	if (remain_buf_size - tag_bytes < 0) {

+		werrstr(Einval);

+		return -1;

+	}

+	if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,

+				     JBD_FEATURE_INCOMPAT_CSUM_V3)) {

+		struct jbd_block_tag3 *tag = __tag;

+		tag_info->block = jbd_get32(tag, blocknr);

+		if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,

+					     JBD_FEATURE_INCOMPAT_64BIT))

+			 tag_info->block |=

+				 (u64int)jbd_get32(tag, blocknr_high) << 32;

+		if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)

+			tag_info->is_escape = true;

+		if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {

+			/* See whether it is possible to hold UUID part.*/

+			if (remain_buf_size - tag_bytes < UUID_SIZE) {

+				werrstr(Einval);

+				return -1;

+			}

+			uuid_start = (char *)tag + tag_bytes;

+			tag_info->uuid_exist = true;

+			tag_info->tag_bytes += UUID_SIZE;

+			memcpy(tag_info->uuid, uuid_start, UUID_SIZE);

+		}

+		if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)

+			tag_info->last_tag = true;

+	} else {

+		struct jbd_block_tag *tag = __tag;

+		tag_info->block = jbd_get32(tag, blocknr);

+		if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,

+					     JBD_FEATURE_INCOMPAT_64BIT))

+			 tag_info->block |=

+				 (u64int)jbd_get32(tag, blocknr_high) << 32;

+		if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)

+			tag_info->is_escape = true;

+		if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {

+			/* See whether it is possible to hold UUID part.*/

+			if (remain_buf_size - tag_bytes < UUID_SIZE) {

+				werrstr(Einval);

+				return -1;

+			}

+			uuid_start = (char *)tag + tag_bytes;

+			tag_info->uuid_exist = true;

+			tag_info->tag_bytes += UUID_SIZE;

+			memcpy(tag_info->uuid, uuid_start, UUID_SIZE);

+		}

+		if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)

+			tag_info->last_tag = true;

+	}

+	return 0;

+}

+/**@brief  Write information to a block tag.

+ * @param  __tag pointer to the block tag

+ * @param  remaining size in buffer containing the block tag

+ * @param  tag_info information of this tag.

+ * @return  0 when succeed, otherwise return Einval.*/

+static int

+jbd_write_block_tag(struct jbd_fs *jbd_fs,

+		    void *__tag,

+		    s32int remain_buf_size,

+		    struct tag_info *tag_info)

+{

+	char *uuid_start;

+	int tag_bytes = jbd_tag_bytes(jbd_fs);

+	tag_info->tag_bytes = tag_bytes;

+	/* See whether it is possible to hold a valid block tag.*/

+	if (remain_buf_size - tag_bytes < 0) {

+		werrstr(Einval);

+		return -1;

+	}

+	if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,

+				     JBD_FEATURE_INCOMPAT_CSUM_V3)) {

+		struct jbd_block_tag3 *tag = __tag;

+		memset(tag, 0, sizeof(struct jbd_block_tag3));

+		jbd_set32(tag, blocknr, (u32int)tag_info->block);

+		if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,

+					     JBD_FEATURE_INCOMPAT_64BIT))

+			jbd_set32(tag, blocknr_high, tag_info->block >> 32);

+		if (tag_info->uuid_exist) {

+			/* See whether it is possible to hold UUID part.*/

+			if (remain_buf_size - tag_bytes < UUID_SIZE) {

+				werrstr(Einval);

+				return -1;

+			}

+			uuid_start = (char *)tag + tag_bytes;

+			tag_info->tag_bytes += UUID_SIZE;

+			memcpy(uuid_start, tag_info->uuid, UUID_SIZE);

+		} else

+			jbd_set32(tag, flags,

+				  jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);

+		jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);

+		if (tag_info->last_tag)

+			jbd_set32(tag, flags,

+				  jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);

+		if (tag_info->is_escape)

+			jbd_set32(tag, flags,

+				  jbd_get32(tag, flags) | JBD_FLAG_ESCAPE);

+	} else {

+		struct jbd_block_tag *tag = __tag;

+		memset(tag, 0, sizeof(struct jbd_block_tag));

+		jbd_set32(tag, blocknr, (u32int)tag_info->block);

+		if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,

+					     JBD_FEATURE_INCOMPAT_64BIT))

+			jbd_set32(tag, blocknr_high, tag_info->block >> 32);

+		if (tag_info->uuid_exist) {

+			/* See whether it is possible to hold UUID part.*/

+			if (remain_buf_size - tag_bytes < UUID_SIZE) {

+				werrstr(Einval);

+				return -1;

+			}

+			uuid_start = (char *)tag + tag_bytes;

+			tag_info->tag_bytes += UUID_SIZE;

+			memcpy(uuid_start, tag_info->uuid, UUID_SIZE);

+		} else

+			jbd_set16(tag, flags,

+				  jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);

+		jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);

+		if (tag_info->last_tag)

+			jbd_set16(tag, flags,

+				  jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);

+		if (tag_info->is_escape)

+			jbd_set16(tag, flags,

+				  jbd_get16(tag, flags) | JBD_FLAG_ESCAPE);

+	}

+	return 0;

+}

+/**@brief  Iterate all block tags in a block.

+ * @param  jbd_fs jbd filesystem

+ * @param  __tag_start pointer to the block

+ * @param  tag_tbl_size size of the block

+ * @param  func callback routine to indicate that

+ *         a block tag is found

+ * @param  arg additional argument to be passed to func */

+static void

+jbd_iterate_block_table(struct jbd_fs *jbd_fs,

+			void *__tag_start,

+			s32int tag_tbl_size,

+			void (*func)(struct jbd_fs * jbd_fs,

+				     struct tag_info *tag_info,

+				     void *arg),

+			void *arg)

+{

+	char *tag_start, *tag_ptr;

+	int tag_bytes = jbd_tag_bytes(jbd_fs);

+	tag_start = __tag_start;

+	tag_ptr = tag_start;

+	/* Cut off the size of block tail storing checksum. */

+	if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,

+				     JBD_FEATURE_INCOMPAT_CSUM_V2) ||

+	    JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,

+				     JBD_FEATURE_INCOMPAT_CSUM_V3))

+		tag_tbl_size -= sizeof(struct jbd_block_tail);

+	while (tag_tbl_size) {

+		struct tag_info tag_info;

+		int rc = jbd_extract_block_tag(jbd_fs,

+				      tag_ptr,

+				      tag_bytes,

+				      tag_tbl_size,

+				      &tag_info);

+		if (rc != 0)

+			break;

+		if (func)

+			func(jbd_fs, &tag_info, arg);

+		/* Stop the iteration when we reach the last tag. */

+		if (tag_info.last_tag)

+			break;

+		tag_ptr += tag_info.tag_bytes;

+		tag_tbl_size -= tag_info.tag_bytes;

+	}

+}

+static void jbd_display_block_tags(struct jbd_fs *jbd_fs,

+				   struct tag_info *tag_info,

+				   void *arg)

+{

+	u32int *iblock = arg;

+	USED(tag_info);

+	ext4_dbg(DEBUG_JBD, "Block in block_tag: %llud\n", tag_info->block);

+	(*iblock)++;

+	wrap(&jbd_fs->sb, *iblock);

+	(void)jbd_fs;

+	return;

+}

+static struct revoke_entry *

+jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)

+{

+	struct revoke_entry tmp = {

+		.block = block

+	};

+	return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);

+}

+/**@brief  Replay a block in a transaction.

+ * @param  jbd_fs jbd filesystem

+ * @param  tag_info tag_info of the logged block.*/

+static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,

+				  struct tag_info *tag_info,

+				  void *__arg)

+{

+	int r;

+	struct replay_arg *arg = __arg;

+	struct recover_info *info = arg->info;

+	u32int *this_block = arg->this_block;

+	struct revoke_entry *revoke_entry;

+	struct ext4_block journal_block, ext4_block;

+	struct ext4_fs *fs = jbd_fs->inode_ref.fs;

+	(*this_block)++;

+	wrap(&jbd_fs->sb, *this_block);

+	/* We replay this block only if the current transaction id

+	 * is equal or greater than that in revoke entry.*/

+	revoke_entry = jbd_revoke_entry_lookup(info, tag_info->block);

+	if (revoke_entry &&

+	    trans_id_diff(arg->this_trans_id, revoke_entry->trans_id) <= 0)

+		return;

+	ext4_dbg(DEBUG_JBD,

+		 "Replaying block in block_tag: %llud\n",

+		 tag_info->block);

+	r = jbd_block_get(jbd_fs, &journal_block, *this_block);

+	if (r != 0)

+		return;

+	/* We need special treatment for ext4 superblock. */

+	if (tag_info->block) {

+		r = ext4_block_get_noread(fs->bdev, &ext4_block, tag_info->block);

+		if (r != 0) {

+			jbd_block_set(jbd_fs, &journal_block);

+			return;

+		}

+		memcpy(ext4_block.data,

+			journal_block.data,

+			jbd_get32(&jbd_fs->sb, blocksize));

+		if (tag_info->is_escape)

+			((struct jbd_bhdr *)ext4_block.data)->magic =

+					to_be32(JBD_MAGIC_NUMBER);

+		ext4_bcache_set_dirty(ext4_block.buf);

+		ext4_block_set(fs->bdev, &ext4_block);

+	} else {

+		u16int mount_count, state;

+		mount_count = ext4_get16(&fs->sb, mount_count);

+		state = ext4_get16(&fs->sb, state);

+		memcpy(&fs->sb,

+			journal_block.data + EXT4_SUPERBLOCK_OFFSET,

+			EXT4_SUPERBLOCK_SIZE);

+		/* Mark system as mounted */

+		ext4_set16(&fs->sb, state, state);

+		r = ext4_sb_write(fs->bdev, &fs->sb);

+		if (r != 0)

+			return;

+		/*Update mount count*/

+		ext4_set16(&fs->sb, mount_count, mount_count);

+	}

+	jbd_block_set(jbd_fs, &journal_block);

+	return;

+}

+/**@brief  Add block address to revoke tree, along with

+ *         its transaction id.

+ * @param  info  journal replay info

+ * @param  block  block address to be replayed.*/

+static void jbd_add_revoke_block_tags(struct recover_info *info,

+				      ext4_fsblk_t block)

+{

+	struct revoke_entry *revoke_entry;

+	ext4_dbg(DEBUG_JBD, "Add block %llud to revoke tree\n", block);

+	/* If the revoke entry with respect to the block address

+	 * exists already, update its transaction id.*/

+	revoke_entry = jbd_revoke_entry_lookup(info, block);

+	if (revoke_entry) {

+		revoke_entry->trans_id = info->this_trans_id;

+		return;

+	}

+	revoke_entry = jbd_alloc_revoke_entry();

+	assert(revoke_entry);

+	revoke_entry->block = block;

+	revoke_entry->trans_id = info->this_trans_id;

+	RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);

+	return;

+}

+static void jbd_destroy_revoke_tree(struct recover_info *info)

+{

+	while (!RB_EMPTY(&info->revoke_root)) {

+		struct revoke_entry *revoke_entry =

+			RB_MIN(jbd_revoke, &info->revoke_root);

+		assert(revoke_entry);

+		RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);

+		jbd_free_revoke_entry(revoke_entry);

+	}

+}

+#define ACTION_SCAN 0

+#define ACTION_REVOKE 1

+#define ACTION_RECOVER 2

+/**@brief  Add entries in a revoke block to revoke tree.

+ * @param  jbd_fs jbd filesystem

+ * @param  header revoke block header

+ * @param  recover_info  journal replay info*/

+static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,

+				  struct jbd_bhdr *header,

+				  struct recover_info *info)

+{

+	char *blocks_entry;

+	struct jbd_revoke_header *revoke_hdr =

+		(struct jbd_revoke_header *)header;

+	u32int i, nr_entries, record_len = 4;

+	/* If we are working on a 64bit jbd filesystem, */

+	if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,

+				     JBD_FEATURE_INCOMPAT_64BIT))

+		record_len = 8;

+	nr_entries = (jbd_get32(revoke_hdr, count) -

+			sizeof(struct jbd_revoke_header)) /

+			record_len;

+	blocks_entry = (char *)(revoke_hdr + 1);

+	for (i = 0;i < nr_entries;i++) {

+		if (record_len == 8) {

+			u64int *blocks =

+				(u64int *)blocks_entry;

+			jbd_add_revoke_block_tags(info, to_be64(*blocks));

+		} else {

+			u32int *blocks =

+				(u32int *)blocks_entry;

+			jbd_add_revoke_block_tags(info, to_be32(*blocks));

+		}

+		blocks_entry += record_len;

+	}

+}

+static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,

+				       struct jbd_bhdr *header,

+				       u32int *iblock)

+{

+	jbd_iterate_block_table(jbd_fs,

+				header + 1,

+				jbd_get32(&jbd_fs->sb, blocksize) -

+					sizeof(struct jbd_bhdr),

+				jbd_display_block_tags,

+				iblock);

+}

+static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,

+					struct jbd_bhdr *header,

+					struct replay_arg *arg)

+{

+	jbd_iterate_block_table(jbd_fs,

+				header + 1,

+				jbd_get32(&jbd_fs->sb, blocksize) -

+					sizeof(struct jbd_bhdr),

+				jbd_replay_block_tags,

+				arg);

+}

+/**@brief  The core routine of journal replay.

+ * @param  jbd_fs jbd filesystem

+ * @param  recover_info  journal replay info

+ * @param  action action needed to be taken

+ * @return standard error code*/

+static int jbd_iterate_log(struct jbd_fs *jbd_fs,

+			   struct recover_info *info,

+			   int action)

+{

+	int r = 0;

+	bool log_end = false;

+	struct jbd_sb *sb = &jbd_fs->sb;

+	u32int start_trans_id, this_trans_id;

+	u32int start_block, this_block;

+	/* We start iterating valid blocks in the whole journal.*/

+	start_trans_id = this_trans_id = jbd_get32(sb, sequence);

+	start_block = this_block = jbd_get32(sb, start);

+	if (action == ACTION_SCAN)

+		info->trans_cnt = 0;

+	else if (!info->trans_cnt)

+		log_end = true;

+	ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %ud\n",

+			    start_trans_id);

+	while (!log_end) {

+		struct ext4_block block;

+		struct jbd_bhdr *header;

+		/* If we are not scanning for the last

+		 * valid transaction in the journal,

+		 * we will stop when we reach the end of

+		 * the journal.*/

+		if (action != ACTION_SCAN)

+			if (trans_id_diff(this_trans_id, info->last_trans_id) > 0) {

+				log_end = true;

+				continue;

+			}

+		r = jbd_block_get(jbd_fs, &block, this_block);

+		if (r != 0)

+			break;

+		header = (struct jbd_bhdr *)block.data;

+		/* This block does not have a valid magic number,

+		 * so we have reached the end of the journal.*/

+		if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {

+			jbd_block_set(jbd_fs, &block);

+			log_end = true;

+			continue;

+		}

+		/* If the transaction id we found is not expected,

+		 * we may have reached the end of the journal.

+		 *

+		 * If we are not scanning the journal, something

+		 * bad might have taken place. :-( */

+		if (jbd_get32(header, sequence) != this_trans_id) {

+			if (action != ACTION_SCAN) {

+				werrstr(Eio);

+				r = -1;

+			}

+			jbd_block_set(jbd_fs, &block);

+			log_end = true;

+			continue;

+		}

+		switch (jbd_get32(header, blocktype)) {

+		case JBD_DESCRIPTOR_BLOCK:

+			if (!jbd_verify_meta_csum(jbd_fs, header)) {

+				ext4_dbg(DEBUG_JBD,

+					DBG_WARN "Descriptor block checksum failed."

+						"Journal block: %ud\n",

+						this_block);

+				log_end = true;

+				break;

+			}

+			ext4_dbg(DEBUG_JBD, "Descriptor block: %ud, "

+					    "trans_id: %ud\n",

+					    this_block, this_trans_id);

+			if (action == ACTION_RECOVER) {

+				struct replay_arg replay_arg;

+				replay_arg.info = info;

+				replay_arg.this_block = &this_block;

+				replay_arg.this_trans_id = this_trans_id;

+				jbd_replay_descriptor_block(jbd_fs,

+						header, &replay_arg);

+			} else

+				jbd_debug_descriptor_block(jbd_fs,

+						header, &this_block);

+			break;

+		case JBD_COMMIT_BLOCK:

+			if (!jbd_verify_commit_csum(jbd_fs,

+					(struct jbd_commit_header *)header)) {

+				ext4_dbg(DEBUG_JBD,

+					DBG_WARN "Commit block checksum failed."

+						"Journal block: %ud\n",

+						this_block);

+				log_end = true;

+				break;

+			}

+			ext4_dbg(DEBUG_JBD, "Commit block: %ud, "

+					    "trans_id: %ud\n",

+					    this_block, this_trans_id);

+			/*

+			 * This is the end of a transaction,

+			 * we may now proceed to the next transaction.

+			 */

+			this_trans_id++;

+			if (action == ACTION_SCAN)

+				info->trans_cnt++;

+			break;

+		case JBD_REVOKE_BLOCK:

+			if (!jbd_verify_meta_csum(jbd_fs, header)) {

+				ext4_dbg(DEBUG_JBD,

+					DBG_WARN "Revoke block checksum failed."

+						"Journal block: %ud\n",

+						this_block);

+				log_end = true;

+				break;

+			}

+			ext4_dbg(DEBUG_JBD, "Revoke block: %ud, "

+					    "trans_id: %ud\n",

+					    this_block, this_trans_id);

+			if (action == ACTION_REVOKE) {

+				info->this_trans_id = this_trans_id;

+				jbd_build_revoke_tree(jbd_fs,

+						header, info);

+			}

+			break;

+		default:

+			log_end = true;

+			break;

+		}

+		jbd_block_set(jbd_fs, &block);

+		this_block++;

+		wrap(sb, this_block);

+		if (this_block == start_block)

+			log_end = true;

+	}

+	ext4_dbg(DEBUG_JBD, "End of journal.\n");

+	if (r == 0 && action == ACTION_SCAN) {

+		/* We have finished scanning the journal. */

+		info->start_trans_id = start_trans_id;

+		if (trans_id_diff(this_trans_id, start_trans_id) > 0)

+			info->last_trans_id = this_trans_id - 1;

+		else

+			info->last_trans_id = this_trans_id;

+	}

+	return r;

+}

+/**@brief  Replay journal.

+ * @param  jbd_fs jbd filesystem

+ * @return standard error code*/

+int jbd_recover(struct jbd_fs *jbd_fs)

+{

+	int r;

+	struct recover_info info;

+	struct jbd_sb *sb = &jbd_fs->sb;

+	if (!sb->start)

+		return 0;

+	RB_INIT(&info.revoke_root);

+	r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);

+	if (r != 0)

+		return r;

+	r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);

+	if (r != 0)

+		return r;

+	r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);

+	if (r == 0) {

+		/* If we successfully replay the journal,

+		 * clear EXT4_FINCOM_RECOVER flag on the

+		 * ext4 superblock, and set the start of

+		 * journal to 0.*/

+		u32int features_incompatible =

+			ext4_get32(&jbd_fs->inode_ref.fs->sb,

+				   features_incompatible);

+		jbd_set32(&jbd_fs->sb, start, 0);

+		jbd_set32(&jbd_fs->sb, sequence, info.last_trans_id);

+		features_incompatible &= ~EXT4_FINCOM_RECOVER;

+		ext4_set32(&jbd_fs->inode_ref.fs->sb,

+			   features_incompatible,

+			   features_incompatible);

+		jbd_fs->dirty = true;

+		r = ext4_sb_write(jbd_fs->bdev,

+				  &jbd_fs->inode_ref.fs->sb);

+	}

+	jbd_destroy_revoke_tree(&info);

+	return r;

+}

+static void jbd_journal_write_sb(struct jbd_journal *journal)

+{

+	struct jbd_fs *jbd_fs = journal->jbd_fs;

+	jbd_set32(&jbd_fs->sb, start, journal->start);

+	jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);

+	jbd_fs->dirty = true;

+}

+/**@brief  Start accessing the journal.

+ * @param  jbd_fs jbd filesystem

+ * @param  journal current journal session

+ * @return standard error code*/

+int jbd_journal_start(struct jbd_fs *jbd_fs,

+		      struct jbd_journal *journal)

+{

+	int r;

+	u32int features_incompatible =

+			ext4_get32(&jbd_fs->inode_ref.fs->sb,

+				   features_incompatible);

+	features_incompatible |= EXT4_FINCOM_RECOVER;

+	ext4_set32(&jbd_fs->inode_ref.fs->sb,

+			features_incompatible,

+			features_incompatible);

+	r = ext4_sb_write(jbd_fs->bdev,

+			&jbd_fs->inode_ref.fs->sb);

+	if (r != 0)

+		return r;

+	journal->first = jbd_get32(&jbd_fs->sb, first);

+	journal->start = journal->first;

+	journal->last = journal->first;

+	/*

+	 * To invalidate any stale records we need to start from

+	 * the checkpoint transaction ID of the previous journalling session

+	 * plus 1.

+	 */

+	journal->trans_id = jbd_get32(&jbd_fs->sb, sequence) + 1;

+	journal->alloc_trans_id = journal->trans_id;

+	journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);

+	TAILQ_INIT(&journal->cp_queue);

+	RB_INIT(&journal->block_rec_root);

+	journal->jbd_fs = jbd_fs;

+	jbd_journal_write_sb(journal);

+	r = jbd_write_sb(jbd_fs);

+	if (r != 0)

+		return r;

+	jbd_fs->bdev->journal = journal;

+	return 0;

+}

+static void jbd_trans_end_write(struct ext4_bcache *bc,

+			  struct ext4_buf *buf,

+			  int res,

+			  void *arg);

+/*

+ * This routine is only suitable to committed transactions. */

+static void jbd_journal_flush_trans(struct jbd_trans *trans)

+{

+	struct jbd_buf *jbd_buf, *tmp;

+	struct jbd_journal *journal = trans->journal;

+	struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;

+	void *tmp_data = ext4_malloc(journal->block_size);

+	assert(tmp_data);

+	TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {

+		struct ext4_buf *buf;

+		struct ext4_block block;

+		/* The buffer is not yet flushed. */

+		buf = ext4_bcache_find_get(fs->bdev->bc, &block,

+					   jbd_buf->block_rec->lba);

+		if (!(buf && ext4_bcache_test_flag(buf, BC_UPTODATE) &&

+		      jbd_buf->block_rec->trans == trans)) {

+			int r;

+			struct ext4_block jbd_block = EXT4_BLOCK_ZERO();

+			r = jbd_block_get(journal->jbd_fs,

+						&jbd_block,

+						jbd_buf->jbd_lba);

+			assert(r == 0);

+			assert(jbd_block.data != nil);

+			memcpy(tmp_data, jbd_block.data,

+					journal->block_size);

+			ext4_block_set(fs->bdev, &jbd_block);

+			r = ext4_blocks_set_direct(fs->bdev, tmp_data,

+					jbd_buf->block_rec->lba, 1);

+			jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf);

+		} else

+			ext4_block_flush_buf(fs->bdev, buf);

+		if (buf)

+			ext4_block_set(fs->bdev, &block);

+	}

+	ext4_free(tmp_data);

+}

+static void

+jbd_journal_skip_pure_revoke(struct jbd_journal *journal,

+			     struct jbd_trans *trans)

+{

+	journal->start = trans->start_iblock +

+		trans->alloc_blocks;

+	wrap(&journal->jbd_fs->sb, journal->start);

+	journal->trans_id = trans->trans_id + 1;

+	jbd_journal_free_trans(journal,

+			trans, false);

+	jbd_journal_write_sb(journal);

+}

+void

+jbd_journal_purge_cp_trans(struct jbd_journal *journal,

+			   bool flush,

+			   bool once)

+{

+	struct jbd_trans *trans;

+	while ((trans = TAILQ_FIRST(&journal->cp_queue))) {

+		if (!trans->data_cnt) {

+			TAILQ_REMOVE(&journal->cp_queue,

+					trans,

+					trans_node);

+			jbd_journal_skip_pure_revoke(journal, trans);

+		} else {

+			if (trans->data_cnt ==

+					trans->written_cnt) {

+				journal->start =

+					trans->start_iblock +

+					trans->alloc_blocks;

+				wrap(&journal->jbd_fs->sb,

+						journal->start);

+				journal->trans_id =

+					trans->trans_id + 1;

+				TAILQ_REMOVE(&journal->cp_queue,

+						trans,

+						trans_node);

+				jbd_journal_free_trans(journal,

+						trans,

+						false);

+				jbd_journal_write_sb(journal);

+			} else if (!flush) {

+				journal->start =

+					trans->start_iblock;

+				wrap(&journal->jbd_fs->sb,

+						journal->start);

+				journal->trans_id =

+					trans->trans_id;

+				jbd_journal_write_sb(journal);

+				break;

+			} else

+				jbd_journal_flush_trans(trans);

+		}

+		if (once)

+			break;

+	}

+}

+/**@brief  Stop accessing the journal.

+ * @param  journal current journal session

+ * @return standard error code*/

+int jbd_journal_stop(struct jbd_journal *journal)

+{

+	int r;

+	struct jbd_fs *jbd_fs = journal->jbd_fs;

+	u32int features_incompatible;

+	/* Make sure that journalled content have reached

+	 * the disk.*/

+	jbd_journal_purge_cp_trans(journal, true, false);

+	/* There should be no block record in this journal

+	 * session. */

+	if (!RB_EMPTY(&journal->block_rec_root))

+		ext4_dbg(DEBUG_JBD,

+			 DBG_WARN "There are still block records "

+			 	  "in this journal session!\n");

+	features_incompatible =

+		ext4_get32(&jbd_fs->inode_ref.fs->sb,

+			   features_incompatible);

+	features_incompatible &= ~EXT4_FINCOM_RECOVER;

+	ext4_set32(&jbd_fs->inode_ref.fs->sb,

+			features_incompatible,

+			features_incompatible);

+	r = ext4_sb_write(jbd_fs->bdev,

+			&jbd_fs->inode_ref.fs->sb);

+	if (r != 0)

+		return r;

+	journal->start = 0;

+	journal->trans_id = 0;

+	jbd_journal_write_sb(journal);

+	return jbd_write_sb(journal->jbd_fs);

+}

+/**@brief  Allocate a block in the journal.

+ * @param  journal current journal session

+ * @param  trans transaction

+ * @return allocated block address*/

+static u32int jbd_journal_alloc_block(struct jbd_journal *journal,

+					struct jbd_trans *trans)

+{

+	u32int start_block;

+	start_block = journal->last++;

+	trans->alloc_blocks++;

+	wrap(&journal->jbd_fs->sb, journal->last);

+	/* If there is no space left, flush just one journalled

+	 * transaction.*/

+	if (journal->last == journal->start) {

+		jbd_journal_purge_cp_trans(journal, true, true);

+		assert(journal->last != journal->start);

+	}

+	return start_block;

+}

+static struct jbd_block_rec *

+jbd_trans_block_rec_lookup(struct jbd_journal *journal,

+			   ext4_fsblk_t lba)

+{

+	struct jbd_block_rec tmp = {

+		.lba = lba

+	};

+	return RB_FIND(jbd_block,

+		       &journal->block_rec_root,

+		       &tmp);

+}

+static void

+jbd_trans_change_ownership(struct jbd_block_rec *block_rec,

+			   struct jbd_trans *new_trans)

+{

+	LIST_REMOVE(block_rec, tbrec_node);

+	if (new_trans) {

+		/* Now this block record belongs to this transaction. */

+		LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);

+	}

+	block_rec->trans = new_trans;

+}

+static inline struct jbd_block_rec *

+jbd_trans_insert_block_rec(struct jbd_trans *trans,

+			   ext4_fsblk_t lba)

+{

+	struct jbd_block_rec *block_rec;

+	block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);

+	if (block_rec) {

+		jbd_trans_change_ownership(block_rec, trans);

+		return block_rec;

+	}

+	block_rec = ext4_calloc(1, sizeof(struct jbd_block_rec));

+	if (!block_rec)

+		return nil;

+	block_rec->lba = lba;

+	block_rec->trans = trans;

+	TAILQ_INIT(&block_rec->dirty_buf_queue);

+	LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);

+	RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec);

+	return block_rec;

+}

+/*

+ * This routine will do the dirty works.

+ */

+static void

+jbd_trans_finish_callback(struct jbd_journal *journal,

+			  const struct jbd_trans *trans,

+			  struct jbd_block_rec *block_rec,

+			  bool abort,

+			  bool revoke)

+{

+	struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;

+	if (block_rec->trans != trans)

+		return;

+	if (!abort) {

+		struct jbd_buf *jbd_buf, *tmp;

+		TAILQ_FOREACH_SAFE(jbd_buf,

+				&block_rec->dirty_buf_queue,

+				dirty_buf_node,

+				tmp) {

+			jbd_trans_end_write(fs->bdev->bc, nil, 0, jbd_buf);

+		}

+	} else {

+		/*

+		 * We have to roll back data if the block is going to be

+		 * aborted.

+		 */

+		struct jbd_buf *jbd_buf;

+		struct ext4_block jbd_block = EXT4_BLOCK_ZERO(),

+				  block = EXT4_BLOCK_ZERO();

+		jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,

+				jbd_buf_dirty);

+		if (jbd_buf) {

+			if (!revoke) {

+				int r;

+				r = ext4_block_get_noread(fs->bdev,

+							&block,

+							block_rec->lba);

+				assert(r == 0);

+				r = jbd_block_get(journal->jbd_fs,

+							&jbd_block,

+							jbd_buf->jbd_lba);

+				assert(r == 0);

+				memcpy(block.data, jbd_block.data,

+						journal->block_size);

+				jbd_trans_change_ownership(block_rec,

+						jbd_buf->trans);

+				block.buf->end_write = jbd_trans_end_write;

+				block.buf->end_write_arg = jbd_buf;

+				ext4_bcache_set_flag(jbd_block.buf, BC_TMP);

+				ext4_bcache_set_dirty(block.buf);

+				ext4_block_set(fs->bdev, &jbd_block);

+				ext4_block_set(fs->bdev, &block);

+				return;

+			} else {

+				/* The revoked buffer is yet written. */

+				jbd_trans_change_ownership(block_rec,

+						jbd_buf->trans);

+			}

+		}

+	}

+}

+static inline void

+jbd_trans_remove_block_rec(struct jbd_journal *journal,

+			   struct jbd_block_rec *block_rec,

+			   struct jbd_trans *trans)

+{

+	/* If this block record doesn't belong to this transaction,

+	 * give up.*/

+	if (block_rec->trans == trans) {

+		LIST_REMOVE(block_rec, tbrec_node);

+		RB_REMOVE(jbd_block,

+				&journal->block_rec_root,

+				block_rec);

+		ext4_free(block_rec);

+	}

+}

+/**@brief  Add block to a transaction and mark it dirty.

+ * @param  trans transaction

+ * @param  block block descriptor

+ * @return standard error code*/

+int jbd_trans_set_block_dirty(struct jbd_trans *trans,

+			      struct ext4_block *block)

+{

+	struct jbd_buf *jbd_buf;

+	struct jbd_revoke_rec *rec, tmp_rec = {

+		.lba = block->lb_id

+	};

+	struct jbd_block_rec *block_rec;

+	if (block->buf->end_write == jbd_trans_end_write) {

+		jbd_buf = block->buf->end_write_arg;

+		if (jbd_buf && jbd_buf->trans == trans)

+			return 0;

+	}

+	jbd_buf = ext4_calloc(1, sizeof(struct jbd_buf));

+	if (!jbd_buf) {

+		werrstr(Enomem);

+		return -1;

+	}

+	if ((block_rec = jbd_trans_insert_block_rec(trans,

+					block->lb_id)) == nil) {

+		ext4_free(jbd_buf);

+		werrstr(Enomem);

+		return -1;

+	}

+	TAILQ_INSERT_TAIL(&block_rec->dirty_buf_queue,

+			jbd_buf,

+			dirty_buf_node);

+	jbd_buf->block_rec = block_rec;

+	jbd_buf->trans = trans;

+	jbd_buf->block = *block;

+	ext4_bcache_inc_ref(block->buf);

+	/* If the content reach the disk, notify us

+	 * so that we may do a checkpoint. */

+	block->buf->end_write = jbd_trans_end_write;

+	block->buf->end_write_arg = jbd_buf;

+	trans->data_cnt++;

+	TAILQ_INSERT_HEAD(&trans->buf_queue, jbd_buf, buf_node);

+	ext4_bcache_set_dirty(block->buf);

+	rec = RB_FIND(jbd_revoke_tree,

+			&trans->revoke_root,

+			&tmp_rec);

+	if (rec) {

+		RB_REMOVE(jbd_revoke_tree, &trans->revoke_root,

+			  rec);

+		ext4_free(rec);

+	}

+	return 0;

+}

+/**@brief  Add block to be revoked to a transaction

+ * @param  trans transaction

+ * @param  lba logical block address

+ * @return standard error code*/

+int jbd_trans_revoke_block(struct jbd_trans *trans,

+			   ext4_fsblk_t lba)

+{

+	struct jbd_revoke_rec tmp_rec = {

+		.lba = lba

+	}, *rec;

+	rec = RB_FIND(jbd_revoke_tree,

+		      &trans->revoke_root,

+		      &tmp_rec);

+	if (rec)

+		return 0;

+	rec = ext4_calloc(1, sizeof(struct jbd_revoke_rec));

+	if (!rec) {

+		werrstr(Enomem);

+		return -1;

+	}

+	rec->lba = lba;

+	RB_INSERT(jbd_revoke_tree, &trans->revoke_root, rec);

+	return 0;

+}

+/**@brief  Try to add block to be revoked to a transaction.

+ *         If @lba still remains in an transaction on checkpoint

+ *         queue, add @lba as a revoked block to the transaction.

+ * @param  trans transaction

+ * @param  lba logical block address

+ * @return standard error code*/

+int jbd_trans_try_revoke_block(struct jbd_trans *trans,

+			       ext4_fsblk_t lba)

+{

+	struct jbd_journal *journal = trans->journal;

+	struct jbd_block_rec *block_rec =

+		jbd_trans_block_rec_lookup(journal, lba);

+	if (block_rec) {

+		if (block_rec->trans == trans) {

+			struct jbd_buf *jbd_buf =

+				TAILQ_LAST(&block_rec->dirty_buf_queue,

+					jbd_buf_dirty);

+			/* If there are still unwritten buffers. */

+			if (TAILQ_FIRST(&block_rec->dirty_buf_queue) !=

+			    jbd_buf)

+				jbd_trans_revoke_block(trans, lba);

+		} else

+			jbd_trans_revoke_block(trans, lba);

+	}

+	return 0;

+}

+/**@brief  Free a transaction

+ * @param  journal current journal session

+ * @param  trans transaction

+ * @param  abort discard all the modifications on the block?

+ * @return standard error code*/

+void jbd_journal_free_trans(struct jbd_journal *journal,

+			    struct jbd_trans *trans,

+			    bool abort)

+{

+	struct jbd_buf *jbd_buf, *tmp;

+	struct jbd_revoke_rec *rec, *tmp2;

+	struct jbd_block_rec *block_rec, *tmp3;

+	struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;

+	TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,

+			  tmp) {

+		block_rec = jbd_buf->block_rec;

+		if (abort) {

+			jbd_buf->block.buf->end_write = nil;

+			jbd_buf->block.buf->end_write_arg = nil;

+			ext4_bcache_clear_dirty(jbd_buf->block.buf);

+			ext4_block_set(fs->bdev, &jbd_buf->block);

+		}

+		TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,

+			jbd_buf,

+			dirty_buf_node);

+		jbd_trans_finish_callback(journal,

+				trans,

+				block_rec,

+				abort,

+				false);

+		TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);

+		ext4_free(jbd_buf);

+	}

+	RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,

+			  tmp2) {

+		RB_REMOVE(jbd_revoke_tree, &trans->revoke_root, rec);

+		ext4_free(rec);

+	}

+	LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,

+			  tmp3) {

+		jbd_trans_remove_block_rec(journal, block_rec, trans);

+	}

+	ext4_free(trans);

+}

+/**@brief  Write commit block for a transaction

+ * @param  trans transaction

+ * @return standard error code*/

+static int jbd_trans_write_commit_block(struct jbd_trans *trans)

+{

+	int rc;

+	struct ext4_block block;

+	struct jbd_commit_header *header;

+	u32int commit_iblock;

+	struct jbd_journal *journal = trans->journal;

+	commit_iblock = jbd_journal_alloc_block(journal, trans);

+	rc = jbd_block_get_noread(journal->jbd_fs, &block, commit_iblock);

+	if (rc != 0)

+		return rc;

+	header = (struct jbd_commit_header *)block.data;

+	jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);

+	jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);

+	jbd_set32(&header->header, sequence, trans->trans_id);

+	if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,

+				JBD_FEATURE_COMPAT_CHECKSUM)) {

+		header->chksum_type = JBD_CRC32_CHKSUM;

+		header->chksum_size = JBD_CRC32_CHKSUM_SIZE;

+		jbd_set32(header, chksum[0], trans->data_csum);

+	}

+	jbd_commit_csum_set(journal->jbd_fs, header);

+	ext4_bcache_set_dirty(block.buf);

+	ext4_bcache_set_flag(block.buf, BC_TMP);

+	rc = jbd_block_set(journal->jbd_fs, &block);

+	return rc;

+}

+/**@brief  Write descriptor block for a transaction

+ * @param  journal current journal session

+ * @param  trans transaction

+ * @return standard error code*/

+static int jbd_journal_prepare(struct jbd_journal *journal,

+			       struct jbd_trans *trans)

+{

+	int rc = 0, i = 0;

+	struct ext4_block desc_block = EXT4_BLOCK_ZERO(),

+			  data_block = EXT4_BLOCK_ZERO();

+	s32int tag_tbl_size = 0;

+	u32int desc_iblock = 0;

+	u32int data_iblock;

+	char *tag_start, *tag_ptr = nil;

+	struct jbd_buf *jbd_buf, *tmp;

+	struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;

+	u32int checksum = EXT4_CRC32_INIT;

+	struct jbd_bhdr *bhdr = nil;

+	void *data;

+	/* Try to remove any non-dirty buffers from the tail of

+	 * buf_queue. */

+	TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,

+			jbd_trans_buf, buf_node, tmp) {

+		struct jbd_revoke_rec tmp_rec = {

+			.lba = jbd_buf->block_rec->lba

+		};

+		/* We stop the iteration when we find a dirty buffer. */

+		if (ext4_bcache_test_flag(jbd_buf->block.buf,

+					BC_DIRTY))

+			break;

+		TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,

+			jbd_buf,

+			dirty_buf_node);

+		jbd_buf->block.buf->end_write = nil;

+		jbd_buf->block.buf->end_write_arg = nil;

+		jbd_trans_finish_callback(journal,

+				trans,

+				jbd_buf->block_rec,

+				true,

+				RB_FIND(jbd_revoke_tree,

+					&trans->revoke_root,

+					&tmp_rec) != nil);

+		jbd_trans_remove_block_rec(journal,

+					jbd_buf->block_rec, trans);

+		trans->data_cnt--;

+		ext4_block_set(fs->bdev, &jbd_buf->block);

+		TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);

+		ext4_free(jbd_buf);

+	}

+	TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {

+		struct tag_info tag_info;

+		bool uuid_exist = false;

+		bool is_escape = false;

+		struct jbd_revoke_rec tmp_rec = {

+			.lba = jbd_buf->block_rec->lba

+		};

+		if (!ext4_bcache_test_flag(jbd_buf->block.buf,

+					   BC_DIRTY)) {

+			TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,

+					jbd_buf,

+					dirty_buf_node);

+			jbd_buf->block.buf->end_write = nil;

+			jbd_buf->block.buf->end_write_arg = nil;

+			/* The buffer has not been modified, just release

+			 * that jbd_buf. */

+			jbd_trans_finish_callback(journal,

+					trans,

+					jbd_buf->block_rec,

+					true,

+					RB_FIND(jbd_revoke_tree,

+						&trans->revoke_root,

+						&tmp_rec) != nil);

+			jbd_trans_remove_block_rec(journal,

+					jbd_buf->block_rec, trans);

+			trans->data_cnt--;

+			ext4_block_set(fs->bdev, &jbd_buf->block);

+			TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);

+			ext4_free(jbd_buf);

+			continue;

+		}

+		checksum = jbd_block_csum(journal->jbd_fs,

+					  jbd_buf->block.data,

+					  checksum,

+					  trans->trans_id);

+		if (((struct jbd_bhdr *)jbd_buf->block.data)->magic ==

+				to_be32(JBD_MAGIC_NUMBER))

+			is_escape = true;

+again:

+		if (!desc_iblock) {

+			desc_iblock = jbd_journal_alloc_block(journal, trans);

+			rc = jbd_block_get_noread(journal->jbd_fs, &desc_block, desc_iblock);

+			if (rc != 0)

+				break;

+			bhdr = (struct jbd_bhdr *)desc_block.data;

+			jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);

+			jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);

+			jbd_set32(bhdr, sequence, trans->trans_id);

+			tag_start = (char *)(bhdr + 1);

+			tag_ptr = tag_start;

+			uuid_exist = true;

+			tag_tbl_size = journal->block_size -

+				sizeof(struct jbd_bhdr);

+			if (jbd_has_csum(&journal->jbd_fs->sb))

+				tag_tbl_size -= sizeof(struct jbd_block_tail);

+			if (!trans->start_iblock)

+				trans->start_iblock = desc_iblock;

+			ext4_bcache_set_dirty(desc_block.buf);

+			ext4_bcache_set_flag(desc_block.buf, BC_TMP);

+		}

+		tag_info.block = jbd_buf->block.lb_id;

+		tag_info.uuid_exist = uuid_exist;

+		tag_info.is_escape = is_escape;

+		if (i == trans->data_cnt - 1)

+			tag_info.last_tag = true;

+		else

+			tag_info.last_tag = false;

+		tag_info.checksum = checksum;

+		if (uuid_exist)

+			memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,

+					UUID_SIZE);

+		rc = jbd_write_block_tag(journal->jbd_fs,

+				tag_ptr,

+				tag_tbl_size,

+				&tag_info);

+		if (rc != 0) {

+			jbd_meta_csum_set(journal->jbd_fs, bhdr);

+			desc_iblock = 0;

+			rc = jbd_block_set(journal->jbd_fs, &desc_block);

+			if (rc != 0)

+				break;

+			goto again;

+		}

+		data_iblock = jbd_journal_alloc_block(journal, trans);

+		rc = jbd_block_get_noread(journal->jbd_fs, &data_block, data_iblock);

+		if (rc != 0) {

+			desc_iblock = 0;

+			ext4_bcache_clear_dirty(desc_block.buf);

+			jbd_block_set(journal->jbd_fs, &desc_block);

+			break;

+		}

+		data = data_block.data;

+		memcpy(data, jbd_buf->block.data,

+			journal->block_size);

+		if (is_escape)

+			((struct jbd_bhdr *)data)->magic = 0;

+		ext4_bcache_set_dirty(data_block.buf);

+		ext4_bcache_set_flag(data_block.buf, BC_TMP);

+		rc = jbd_block_set(journal->jbd_fs, &data_block);

+		if (rc != 0) {

+			desc_iblock = 0;

+			ext4_bcache_clear_dirty(desc_block.buf);

+			jbd_block_set(journal->jbd_fs, &desc_block);

+			break;

+		}

+		jbd_buf->jbd_lba = data_iblock;

+		tag_ptr += tag_info.tag_bytes;

+		tag_tbl_size -= tag_info.tag_bytes;

+		i++;

+	}

+	if (rc == 0 && desc_iblock) {

+		jbd_meta_csum_set(journal->jbd_fs,

+				(struct jbd_bhdr *)bhdr);

+		trans->data_csum = checksum;

+		rc = jbd_block_set(journal->jbd_fs, &desc_block);

+	}

+	return rc;

+}

+/**@brief  Write revoke block for a transaction

+ * @param  journal current journal session

+ * @param  trans transaction

+ * @return standard error code*/

+static int

+jbd_journal_prepare_revoke(struct jbd_journal *journal,

+			   struct jbd_trans *trans)

+{

+	int rc = 0, i = 0;

+	struct ext4_block desc_block = EXT4_BLOCK_ZERO();

+	s32int tag_tbl_size = 0;

+	u32int desc_iblock = 0;

+	char *blocks_entry = nil;

+	struct jbd_revoke_rec *rec, *tmp;

+	struct jbd_revoke_header *header = nil;

+	s32int record_len = 4;

+	struct jbd_bhdr *bhdr = nil;

+	if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,

+				     JBD_FEATURE_INCOMPAT_64BIT))

+		record_len = 8;

+	RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,

+			  tmp) {

+again:

+		if (!desc_iblock) {

+			desc_iblock = jbd_journal_alloc_block(journal, trans);

+			rc = jbd_block_get_noread(journal->jbd_fs, &desc_block,

+						  desc_iblock);

+			if (rc != 0)

+				break;

+			bhdr = (struct jbd_bhdr *)desc_block.data;

+			jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);

+			jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);

+			jbd_set32(bhdr, sequence, trans->trans_id);

+			header = (struct jbd_revoke_header *)bhdr;

+			blocks_entry = (char *)(header + 1);

+			tag_tbl_size = journal->block_size -

+				sizeof(struct jbd_revoke_header);

+			if (jbd_has_csum(&journal->jbd_fs->sb))

+				tag_tbl_size -= sizeof(struct jbd_block_tail);

+			if (!trans->start_iblock)

+				trans->start_iblock = desc_iblock;

+			ext4_bcache_set_dirty(desc_block.buf);

+			ext4_bcache_set_flag(desc_block.buf, BC_TMP);

+		}

+		if (tag_tbl_size < record_len) {

+			jbd_set32(header, count,

+				  journal->block_size - tag_tbl_size);

+			jbd_meta_csum_set(journal->jbd_fs, bhdr);

+			bhdr = nil;

+			desc_iblock = 0;

+			header = nil;

+			rc = jbd_block_set(journal->jbd_fs, &desc_block);

+			if (rc != 0)

+				break;

+			goto again;

+		}

+		if (record_len == 8) {

+			u64int *blocks =

+				(u64int *)blocks_entry;

+			*blocks = to_be64(rec->lba);

+		} else {

+			u32int *blocks =

+				(u32int *)blocks_entry;

+			*blocks = to_be32((u32int)rec->lba);

+		}

+		blocks_entry += record_len;

+		tag_tbl_size -= record_len;

+		i++;

+	}

+	if (rc == 0 && desc_iblock) {

+		if (header != nil)

+			jbd_set32(header, count,

+				  journal->block_size - tag_tbl_size);

+		jbd_meta_csum_set(journal->jbd_fs, bhdr);

+		rc = jbd_block_set(journal->jbd_fs, &desc_block);

+	}

+	return rc;

+}

+/**@brief  Put references of block descriptors in a transaction.

+ * @param  journal current journal session

+ * @param  trans transaction*/

+void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)

+{

+	struct jbd_buf *jbd_buf, *tmp;

+	struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;

+	TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,

+			tmp) {

+		struct ext4_block block = jbd_buf->block;

+		ext4_block_set(fs->bdev, &block);

+	}

+}

+/**@brief  Update the start block of the journal when

+ *         all the contents in a transaction reach the disk.*/

+static void jbd_trans_end_write(struct ext4_bcache *bc,

+			  struct ext4_buf *buf,

+			  int res,

+			  void *arg)

+{

+	struct jbd_buf *jbd_buf = arg;

+	struct jbd_trans *trans = jbd_buf->trans;

+	struct jbd_block_rec *block_rec = jbd_buf->block_rec;

+	struct jbd_journal *journal = trans->journal;

+	bool first_in_queue =

+		trans == TAILQ_FIRST(&journal->cp_queue);

+	if (res != 0)

+		trans->error = res;

+	USED(bc);

+	TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);

+	TAILQ_REMOVE(&block_rec->dirty_buf_queue,

+			jbd_buf,

+			dirty_buf_node);

+	jbd_trans_finish_callback(journal,

+			trans,

+			jbd_buf->block_rec,

+			false,

+			false);

+	if (block_rec->trans == trans && buf) {

+		/* Clear the end_write and end_write_arg fields. */

+		buf->end_write = nil;

+		buf->end_write_arg = nil;

+	}

+	ext4_free(jbd_buf);

+	trans->written_cnt++;

+	if (trans->written_cnt == trans->data_cnt) {

+		/* If it is the first transaction on checkpoint queue,

+		 * we will shift the start of the journal to the next

+		 * transaction, and remove subsequent written

+		 * transactions from checkpoint queue until we find

+		 * an unwritten one. */

+		if (first_in_queue) {

+			journal->start = trans->start_iblock +

+				trans->alloc_blocks;

+			wrap(&journal->jbd_fs->sb, journal->start);

+			journal->trans_id = trans->trans_id + 1;

+			TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);

+			jbd_journal_free_trans(journal, trans, false);

+			jbd_journal_purge_cp_trans(journal, false, false);

+			jbd_journal_write_sb(journal);

+			jbd_write_sb(journal->jbd_fs);

+		}

+	}

+}

+/**@brief  Commit a transaction to the journal immediately.

+ * @param  journal current journal session

+ * @param  trans transaction

+ * @return standard error code*/

+static int __jbd_journal_commit_trans(struct jbd_journal *journal,

+				      struct jbd_trans *trans)

+{

+	int rc;

+	u32int last = journal->last;

+	struct jbd_revoke_rec *rec, *tmp;

+	trans->trans_id = journal->alloc_trans_id;

+	rc = jbd_journal_prepare(journal, trans);

+	if (rc != 0)

+		goto Finish;

+	rc = jbd_journal_prepare_revoke(journal, trans);

+	if (rc != 0)

+		goto Finish;

+	if (TAILQ_EMPTY(&trans->buf_queue) &&

+	    RB_EMPTY(&trans->revoke_root)) {

+		/* Since there are no entries in both buffer list

+		 * and revoke entry list, we do not consider trans as

+		 * complete transaction and just return 0.*/

+		jbd_journal_free_trans(journal, trans, false);

+		goto Finish;

+	}

+	rc = jbd_trans_write_commit_block(trans);

+	if (rc != 0)

+		goto Finish;

+	journal->alloc_trans_id++;

+	/* Complete the checkpoint of buffers which are revoked. */

+	RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,

+			tmp) {

+		struct jbd_block_rec *block_rec =

+			jbd_trans_block_rec_lookup(journal, rec->lba);

+		struct jbd_buf *jbd_buf = nil;

+		if (block_rec)

+			jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,

+					jbd_buf_dirty);

+		if (jbd_buf) {

+			struct ext4_buf *buf;

+			struct ext4_block block = EXT4_BLOCK_ZERO();

+			/*

+			 * We do this to reset the ext4_buf::end_write and

+			 * ext4_buf::end_write_arg fields so that the checkpoint

+			 * callback won't be triggered again.

+			 */

+			buf = ext4_bcache_find_get(journal->jbd_fs->bdev->bc,

+					&block,

+					jbd_buf->block_rec->lba);

+			jbd_trans_end_write(journal->jbd_fs->bdev->bc, buf, 0, jbd_buf);

+			if (buf)

+				ext4_block_set(journal->jbd_fs->bdev, &block);

+		}

+	}

+	if (TAILQ_EMPTY(&journal->cp_queue)) {

+		/*

+		 * This transaction is going to be the first object in the

+		 * checkpoint queue.

+		 * When the first transaction in checkpoint queue is completely

+		 * written to disk, we shift the tail of the log to right.

+		 */

+		if (trans->data_cnt) {

+			journal->start = trans->start_iblock;

+			wrap(&journal->jbd_fs->sb, journal->start);

+			journal->trans_id = trans->trans_id;

+			jbd_journal_write_sb(journal);

+			jbd_write_sb(journal->jbd_fs);

+			TAILQ_INSERT_TAIL(&journal->cp_queue, trans,

+					trans_node);

+			jbd_journal_cp_trans(journal, trans);

+		} else {

+			journal->start = trans->start_iblock +

+				trans->alloc_blocks;

+			wrap(&journal->jbd_fs->sb, journal->start);

+			journal->trans_id = trans->trans_id + 1;

+			jbd_journal_write_sb(journal);

+			jbd_journal_free_trans(journal, trans, false);

+		}

+	} else {

+		/* No need to do anything to the JBD superblock. */

+		TAILQ_INSERT_TAIL(&journal->cp_queue, trans,

+				trans_node);

+		if (trans->data_cnt)

+			jbd_journal_cp_trans(journal, trans);

+	}

+Finish:

+	if (rc != 0) {

+		journal->last = last;

+		jbd_journal_free_trans(journal, trans, true);

+	}

+	return rc;

+}

+/**@brief  Allocate a new transaction

+ * @param  journal current journal session

+ * @return transaction allocated*/

+struct jbd_trans *

+jbd_journal_new_trans(struct jbd_journal *journal)

+{

+	struct jbd_trans *trans;

+	trans = ext4_calloc(1, sizeof(struct jbd_trans));

+	if (!trans)

+		return nil;

+	/* We will assign a trans_id to this transaction,

+	 * once it has been committed.*/

+	trans->journal = journal;

+	trans->data_csum = EXT4_CRC32_INIT;

+	trans->error = 0;

+	TAILQ_INIT(&trans->buf_queue);

+	return trans;

+}

+/**@brief  Commit a transaction to the journal immediately.

+ * @param  journal current journal session

+ * @param  trans transaction

+ * @return standard error code*/

+int jbd_journal_commit_trans(struct jbd_journal *journal,

+			     struct jbd_trans *trans)

+{

+	return __jbd_journal_commit_trans(journal, trans);

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_mbr.c

@@ -1,0 +1,165 @@

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+#include "ext4_debug.h"

+#include "ext4_mbr.h"

+#define MBR_SIGNATURE 0xAA55

+#pragma pack on

+struct ext4_part_entry {

+	u8int status;

+	u8int chs1[3];

+	u8int type;

+	u8int chs2[3];

+	u32int first_lba;

+	u32int sectors;

+};

+struct ext4_mbr {

+	u8int bootstrap[442];

+	u32int disk_id;

+	struct ext4_part_entry part_entry[4];

+	u16int signature;

+};

+#pragma pack off

+int ext4_mbr_scan(struct ext4_blockdev *parent, struct ext4_mbr_bdevs *bdevs)

+{

+	int r;

+	usize i;

+	ext4_dbg(DEBUG_MBR, DBG_INFO "ext4_mbr_scan\n");

+	memset(bdevs, 0, sizeof(struct ext4_mbr_bdevs));

+	r = ext4_block_init(parent);

+	if (r != 0)

+		return r;

+	r = ext4_block_readbytes(parent, 0, parent->bdif->ph_bbuf, 512);

+	if (r != 0) {

+		goto blockdev_fini;

+	}

+	const struct ext4_mbr *mbr = (void *)parent->bdif->ph_bbuf;

+	if (to_le16(mbr->signature) != MBR_SIGNATURE) {

+		ext4_dbg(DEBUG_MBR, DBG_ERROR "ext4_mbr_scan: unknown "

+			 "signature: 0x%x\n", to_le16(mbr->signature));

+		werrstr(Enotfound);

+		r = -1;

+		goto blockdev_fini;

+	}

+	/*Show bootstrap code*/

+	ext4_dbg(DEBUG_MBR, "mbr_part: bootstrap:");

+	for (i = 0; i < sizeof(mbr->bootstrap); ++i) {

+		if (!(i & 0xF))

+				ext4_dbg(DEBUG_MBR | DEBUG_NOPREFIX, "\n");

+		ext4_dbg(DEBUG_MBR | DEBUG_NOPREFIX, "%02x, ", mbr->bootstrap[i]);

+	}

+	ext4_dbg(DEBUG_MBR | DEBUG_NOPREFIX, "\n\n");

+	for (i = 0; i < 4; ++i) {

+		const struct ext4_part_entry *pe = &mbr->part_entry[i];

+		ext4_dbg(DEBUG_MBR, "mbr_part: %d\n", (int)i);

+		ext4_dbg(DEBUG_MBR, "\tstatus: 0x%x\n", pe->status);

+		ext4_dbg(DEBUG_MBR, "\ttype 0x%x:\n", pe->type);

+		ext4_dbg(DEBUG_MBR, "\tfirst_lba: 0x%ux\n", pe->first_lba);

+		ext4_dbg(DEBUG_MBR, "\tsectors: 0x%ux\n", pe->sectors);

+		if (!pe->sectors)

+			continue; /*Empty entry*/

+		if (pe->type != 0x83)

+			continue; /*Unsupported entry. 0x83 - linux native*/

+		bdevs->partitions[i].bdif = parent->bdif;

+		bdevs->partitions[i].part_offset =

+			(u64int)pe->first_lba * parent->bdif->ph_bsize;

+		bdevs->partitions[i].part_size =

+			(u64int)pe->sectors * parent->bdif->ph_bsize;

+	}

+	blockdev_fini:

+	ext4_block_fini(parent);

+	return r;

+}

+int ext4_mbr_write(struct ext4_blockdev *parent, struct ext4_mbr_parts *parts, u32int disk_id)

+{

+	int r;

+	u64int disk_size;

+	u32int division_sum = parts->division[0] + parts->division[1] +

+				parts->division[2] + parts->division[3];

+	if (division_sum > 100) {

+		werrstr(Einval);

+		return -1;

+	}

+	ext4_dbg(DEBUG_MBR, DBG_INFO "ext4_mbr_write\n");

+	r = ext4_block_init(parent);

+	if (r != 0)

+		return r;

+	disk_size = parent->part_size;

+	/*Calculate CHS*/

+	u32int k = 16;

+	while ((k < 256) && ((disk_size / parent->bdif->ph_bsize / k / 63) > 1024))

+		k *= 2;

+	if (k == 256)

+		--k;

+	const u32int cyl_size = parent->bdif->ph_bsize * 63 * k;

+	const u32int cyl_count = disk_size / cyl_size;

+	struct ext4_mbr *mbr = (void *)parent->bdif->ph_bbuf;

+	memset(mbr, 0, sizeof(struct ext4_mbr));

+	mbr->disk_id = disk_id;

+	u32int cyl_it = 0;

+	for (int i = 0; i < 4; ++i) {

+		u32int cyl_part = cyl_count * parts->division[i] / 100;

+		if (!cyl_part)

+			continue;

+		u32int part_start = cyl_it * cyl_size;

+		u32int part_size = cyl_part * cyl_size;

+		if (i == 0) {

+			part_start += 63;

+			part_size -= 63 * parent->bdif->ph_bsize;

+		}

+		u32int cyl_end = cyl_part + cyl_it - 1;

+		mbr->part_entry[i].status = 0;

+		mbr->part_entry[i].chs1[0] = i ? 0 : 1;;

+		mbr->part_entry[i].chs1[1] = ((cyl_it >> 2) & 0xC0) + 1;

+		mbr->part_entry[i].chs1[2] = cyl_it & 0xFF;

+		mbr->part_entry[i].type = 0x83;

+		mbr->part_entry[i].chs2[0] = k - 1;

+		mbr->part_entry[i].chs2[1] = ((cyl_end >> 2) & 0xC0) + 63;

+		mbr->part_entry[i].chs2[2] = cyl_end & 0xFF;

+		mbr->part_entry[i].first_lba = part_start;

+		mbr->part_entry[i].sectors = part_size / parent->bdif->ph_bsize;

+		cyl_it += cyl_part;

+	}

+	mbr->signature = MBR_SIGNATURE;

+	r = ext4_block_writebytes(parent, 0, parent->bdif->ph_bbuf, 512);

+	if (r != 0)

+		goto blockdev_fini;

+	blockdev_fini:

+	ext4_block_fini(parent);

+	return r;

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_mkfs.c

@@ -1,0 +1,805 @@

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+#include "ext4_debug.h"

+#include "ext4_super.h"

+#include "ext4_block_group.h"

+#include "ext4_dir.h"

+#include "ext4_dir_idx.h"

+#include "ext4_fs.h"

+#include "ext4_inode.h"

+#include "ext4_ialloc.h"

+#include "ext4_mkfs.h"

+static inline int log_2(int j)

+{

+	int i;

+	for (i = 0; j > 0; i++)

+		j >>= 1;

+	return i - 1;

+}

+static int sb2info(struct ext4_sblock *sb, struct ext4_mkfs_info *info)

+{

+	if (to_le16(sb->magic) != EXT4_SUPERBLOCK_MAGIC) {

+		werrstr("invalid superblock magic");

+		return -1;

+	}

+	info->block_size = 1024 << to_le32(sb->log_block_size);

+	info->blocks_per_group = to_le32(sb->blocks_per_group);

+	info->inodes_per_group = to_le32(sb->inodes_per_group);

+	info->inode_size = to_le16(sb->inode_size);

+	info->inodes = to_le32(sb->inodes_count);

+	info->feat_ro_compat = to_le32(sb->features_read_only);

+	info->feat_compat = to_le32(sb->features_compatible);

+	info->feat_incompat = to_le32(sb->features_incompatible);

+	info->bg_desc_reserve_blocks = to_le16(sb->s_reserved_gdt_blocks);

+	strncpy(info->label,sb->volume_name,sizeof(info->label));

+	info->len = (u64int)info->block_size * ext4_sb_get_blocks_cnt(sb);

+	info->dsc_size = to_le16(sb->desc_size);

+	memcpy(info->uuid, sb->uuid, UUID_SIZE);

+	return 0;

+}

+static u32int compute_blocks_per_group(struct ext4_mkfs_info *info)

+{

+	return info->block_size * 8;

+}

+static u32int compute_inodes(struct ext4_mkfs_info *info)

+{

+	return (u32int)EXT4_DIV_ROUND_UP(info->len, info->block_size) / 4;

+}

+static u32int compute_inodes_per_group(struct ext4_mkfs_info *info)

+{

+	u32int blocks = (u32int)EXT4_DIV_ROUND_UP(info->len, info->block_size);

+	u32int block_groups = EXT4_DIV_ROUND_UP(blocks, info->blocks_per_group);

+	u32int inodes = EXT4_DIV_ROUND_UP(info->inodes, block_groups);

+	inodes = EXT4_ALIGN(inodes, (info->block_size / info->inode_size));

+	/* After properly rounding up the number of inodes/group,

+	 * make sure to update the total inodes field in the info struct.

+	 */

+	info->inodes = inodes * block_groups;

+	return inodes;

+}

+static u32int compute_journal_blocks(struct ext4_mkfs_info *info)

+{

+	u32int journal_blocks = (u32int)EXT4_DIV_ROUND_UP(info->len,

+						 info->block_size) / 64;

+	if (journal_blocks < 1024)

+		journal_blocks = 1024;

+	if (journal_blocks > 32768)

+		journal_blocks = 32768;

+	return journal_blocks;

+}

+static bool has_superblock(struct ext4_mkfs_info *info, u32int bgid)

+{

+	if (!(info->feat_ro_compat & EXT4_FRO_COM_SPARSE_SUPER))

+		return true;

+	return ext4_sb_sparse(bgid);

+}

+int create_fs_aux_info(struct fs_aux_info *aux_info,

+			      struct ext4_mkfs_info *info)

+{

+	aux_info->first_data_block = (info->block_size > 1024) ? 0 : 1;

+	aux_info->len_blocks = info->len / info->block_size;

+	aux_info->inode_table_blocks = EXT4_DIV_ROUND_UP(info->inodes_per_group *

+			info->inode_size, info->block_size);

+	aux_info->groups = (u32int)EXT4_DIV_ROUND_UP(aux_info->len_blocks -

+			aux_info->first_data_block, info->blocks_per_group);

+	aux_info->blocks_per_ind = info->block_size / sizeof(u32int);

+	aux_info->blocks_per_dind =

+			aux_info->blocks_per_ind * aux_info->blocks_per_ind;

+	aux_info->blocks_per_tind =

+			aux_info->blocks_per_dind * aux_info->blocks_per_dind;

+	aux_info->bg_desc_blocks =

+		EXT4_DIV_ROUND_UP(aux_info->groups * info->dsc_size,

+			info->block_size);

+	aux_info->default_i_flags = EXT4_INODE_FLAG_NOATIME;

+	u32int last_group_size = aux_info->len_blocks % info->blocks_per_group;

+	u32int last_header_size = 2 + aux_info->inode_table_blocks;

+	if (has_superblock(info, aux_info->groups - 1))

+		last_header_size += 1 + aux_info->bg_desc_blocks +

+			info->bg_desc_reserve_blocks;

+	if (last_group_size > 0 && last_group_size < last_header_size) {

+		aux_info->groups--;

+		aux_info->len_blocks -= last_group_size;

+	}

+	aux_info->sb = ext4_calloc(1, EXT4_SUPERBLOCK_SIZE);

+	if (!aux_info->sb) {

+		werrstr(Enomem);

+		return -1;

+	}

+	aux_info->bg_desc_blk = ext4_calloc(1, info->block_size);

+	if (!aux_info->bg_desc_blk) {

+		werrstr(Enomem);

+		return -1;

+	}

+	aux_info->xattrs = nil;

+	ext4_dbg(DEBUG_MKFS, DBG_INFO "create_fs_aux_info\n");

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "first_data_block: %ud\n",

+			aux_info->first_data_block);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "len_blocks: %llud\n",

+			aux_info->len_blocks);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "inode_table_blocks: %ud\n",

+			aux_info->inode_table_blocks);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "groups: %ud\n",

+			aux_info->groups);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "bg_desc_blocks: %ud\n",

+			aux_info->bg_desc_blocks);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "default_i_flags: %ud\n",

+			aux_info->default_i_flags);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "blocks_per_ind: %ud\n",

+			aux_info->blocks_per_ind);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "blocks_per_dind: %ud\n",

+			aux_info->blocks_per_dind);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "blocks_per_tind: %ud\n",

+			aux_info->blocks_per_tind);

+	return 0;

+}

+void release_fs_aux_info(struct fs_aux_info *aux_info)

+{

+	if (aux_info->sb)

+		ext4_free(aux_info->sb);

+	if (aux_info->bg_desc_blk)

+		ext4_free(aux_info->bg_desc_blk);

+}

+/* Fill in the superblock memory buffer based on the filesystem parameters */

+static void fill_sb(struct fs_aux_info *aux_info, struct ext4_mkfs_info *info)

+{

+	struct ext4_sblock *sb = aux_info->sb;

+	sb->inodes_count = to_le32(info->inodes_per_group * aux_info->groups);

+	ext4_sb_set_blocks_cnt(sb, aux_info->len_blocks);

+	ext4_sb_set_free_blocks_cnt(sb, aux_info->len_blocks);

+	sb->free_inodes_count = to_le32(info->inodes_per_group * aux_info->groups);

+	sb->reserved_blocks_count_lo = to_le32(0);

+	sb->first_data_block = to_le32(aux_info->first_data_block);

+	sb->log_block_size = to_le32(log_2(info->block_size / 1024));

+	sb->log_cluster_size = to_le32(log_2(info->block_size / 1024));

+	sb->blocks_per_group = to_le32(info->blocks_per_group);

+	sb->frags_per_group = to_le32(info->blocks_per_group);

+	sb->inodes_per_group = to_le32(info->inodes_per_group);

+	sb->mount_time = to_le32(0);

+	sb->write_time = to_le32(0);

+	sb->mount_count = to_le16(0);

+	sb->max_mount_count = to_le16(0xFFFF);

+	sb->magic = to_le16(EXT4_SUPERBLOCK_MAGIC);

+	sb->state = to_le16(EXT4_SUPERBLOCK_STATE_VALID_FS);

+	sb->errors = to_le16(EXT4_SUPERBLOCK_ERRORS_RO);

+	sb->minor_rev_level = to_le16(0);

+	sb->last_check_time = to_le32(0);

+	sb->check_interval = to_le32(0);

+	sb->creator_os = to_le32(EXT4_SUPERBLOCK_OS_LINUX);

+	sb->rev_level = to_le32(1);

+	sb->def_resuid = to_le16(0);

+	sb->def_resgid = to_le16(0);

+	sb->first_inode = to_le32(EXT4_GOOD_OLD_FIRST_INO);

+	sb->inode_size = to_le16(info->inode_size);

+	sb->block_group_index = to_le16(0);

+	sb->features_compatible = to_le32(info->feat_compat);

+	sb->features_incompatible = to_le32(info->feat_incompat);

+	sb->features_read_only = to_le32(info->feat_ro_compat);

+	memcpy(sb->uuid, info->uuid, UUID_SIZE);

+	memset(sb->volume_name, 0, sizeof(sb->volume_name));

+	strncpy(sb->volume_name, info->label, sizeof(sb->volume_name));

+	memset(sb->last_mounted, 0, sizeof(sb->last_mounted));

+	sb->algorithm_usage_bitmap = to_le32(0);

+	sb->s_prealloc_blocks = 0;

+	sb->s_prealloc_dir_blocks = 0;

+	sb->s_reserved_gdt_blocks = to_le16(info->bg_desc_reserve_blocks);

+	if (info->feat_compat & EXT4_FCOM_HAS_JOURNAL)

+		sb->journal_inode_number = to_le32(EXT4_JOURNAL_INO);

+	sb->journal_backup_type = 1;

+	sb->journal_dev = to_le32(0);

+	sb->last_orphan = to_le32(0);

+	sb->hash_seed[0] = to_le32(0x11111111);

+	sb->hash_seed[1] = to_le32(0x22222222);

+	sb->hash_seed[2] = to_le32(0x33333333);

+	sb->hash_seed[3] = to_le32(0x44444444);

+	sb->default_hash_version = EXT2_HTREE_HALF_MD4;

+	sb->checksum_type = 1;

+	sb->desc_size = to_le16(info->dsc_size);

+	sb->default_mount_opts = to_le32(0);

+	sb->first_meta_bg = to_le32(0);

+	sb->mkfs_time = to_le32(0);

+	sb->reserved_blocks_count_hi = to_le32(0);

+	sb->min_extra_isize = to_le32(sizeof(struct ext4_inode) -

+		EXT4_GOOD_OLD_INODE_SIZE);

+	sb->want_extra_isize = to_le32(sizeof(struct ext4_inode) -

+		EXT4_GOOD_OLD_INODE_SIZE);

+	sb->flags = to_le32(EXT4_SUPERBLOCK_FLAGS_SIGNED_HASH);

+}

+static int write_bgroup_block(struct ext4_blockdev *bd,

+			      struct fs_aux_info *aux_info,

+			      struct ext4_mkfs_info *info,

+			      u32int blk)

+{

+	int r = 0;

+	u32int j;

+	struct ext4_block b;

+	u32int block_size = ext4_sb_get_block_size(aux_info->sb);

+	for (j = 0; j < aux_info->groups; j++) {

+		u64int bg_start_block = aux_info->first_data_block +

+					  j * info->blocks_per_group;

+		u32int blk_off = 0;

+		blk_off += aux_info->bg_desc_blocks;

+		if (has_superblock(info, j)) {

+			bg_start_block++;

+			blk_off += info->bg_desc_reserve_blocks;

+			USED(blk_off);

+		}

+		u64int dsc_blk = bg_start_block + blk;

+		r = ext4_block_get_noread(bd, &b, dsc_blk);

+		if (r != 0)

+			return r;

+		memcpy(b.data, aux_info->bg_desc_blk, block_size);

+		ext4_bcache_set_dirty(b.buf);

+		r = ext4_block_set(bd, &b);

+		if (r != 0)

+			return r;

+	}

+	return r;

+}

+static int write_bgroups(struct ext4_blockdev *bd, struct fs_aux_info *aux_info,

+			 struct ext4_mkfs_info *info)

+{

+	int r;

+	struct ext4_block b;

+	struct ext4_bgroup *bg_desc;

+	u32int i;

+	u32int bg_free_blk;

+	u64int sb_free_blk = 0;

+	u32int block_size = ext4_sb_get_block_size(aux_info->sb);

+	u32int dsc_size = ext4_sb_get_desc_size(aux_info->sb);

+	u32int dsc_per_block = block_size / dsc_size;

+	u32int k = 0;

+	for (i = 0; i < aux_info->groups; i++) {

+		u64int bg_start_block = aux_info->first_data_block +

+			aux_info->first_data_block + i * info->blocks_per_group;

+		u32int blk_off = 0;

+		bg_desc = (void *)(aux_info->bg_desc_blk + k * dsc_size);

+		bg_free_blk = info->blocks_per_group -

+				aux_info->inode_table_blocks;

+		bg_free_blk -= 2;

+		blk_off += aux_info->bg_desc_blocks;

+		if (i == (aux_info->groups - 1))

+			bg_free_blk -= aux_info->first_data_block;

+		if (has_superblock(info, i)) {

+			bg_start_block++;

+			blk_off += info->bg_desc_reserve_blocks;

+			bg_free_blk -= info->bg_desc_reserve_blocks + 1;

+			bg_free_blk -= aux_info->bg_desc_blocks;

+		}

+		ext4_bg_set_block_bitmap(bg_desc, aux_info->sb,

+					 bg_start_block + blk_off + 1);

+		ext4_bg_set_inode_bitmap(bg_desc, aux_info->sb,

+					 bg_start_block + blk_off + 2);

+		ext4_bg_set_inode_table_first_block(bg_desc,

+						aux_info->sb,

+						bg_start_block + blk_off + 3);

+		ext4_bg_set_free_blocks_count(bg_desc, aux_info->sb,

+					      bg_free_blk);

+		ext4_bg_set_free_inodes_count(bg_desc,

+				aux_info->sb, to_le32(aux_info->sb->inodes_per_group));

+		ext4_bg_set_used_dirs_count(bg_desc, aux_info->sb, 0);

+		ext4_bg_set_flag(bg_desc,

+				 EXT4_BLOCK_GROUP_BLOCK_UNINIT |

+				 EXT4_BLOCK_GROUP_INODE_UNINIT);

+		sb_free_blk += bg_free_blk;

+		r = ext4_block_get_noread(bd, &b, bg_start_block + blk_off + 1);

+		if (r != 0)

+			return r;

+		memset(b.data, 0, block_size);

+		ext4_bcache_set_dirty(b.buf);

+		r = ext4_block_set(bd, &b);

+		if (r != 0)

+			return r;

+		r = ext4_block_get_noread(bd, &b, bg_start_block + blk_off + 2);

+		if (r != 0)

+			return r;

+		memset(b.data, 0, block_size);

+		ext4_bcache_set_dirty(b.buf);

+		r = ext4_block_set(bd, &b);

+		if (r != 0)

+			return r;

+		if (++k != dsc_per_block)

+			continue;

+		k = 0;

+		r = write_bgroup_block(bd, aux_info, info, i / dsc_per_block);

+		if (r != 0)

+			return r;

+	}

+	r = write_bgroup_block(bd, aux_info, info, i / dsc_per_block);

+	if (r != 0)

+		return r;

+	ext4_sb_set_free_blocks_cnt(aux_info->sb, sb_free_blk);

+	return r;

+}

+int write_sblocks(struct ext4_blockdev *bd, struct fs_aux_info *aux_info,

+			  struct ext4_mkfs_info *info)

+{

+	u64int offset;

+	u32int i;

+	int r;

+	/* write out the backup superblocks */

+	for (i = 1; i < aux_info->groups; i++) {

+		if (has_superblock(info, i)) {

+			offset = info->block_size * (aux_info->first_data_block

+				+ i * info->blocks_per_group);

+			aux_info->sb->block_group_index = to_le16(i);

+			r = ext4_block_writebytes(bd, offset, aux_info->sb,

+						  EXT4_SUPERBLOCK_SIZE);

+			if (r != 0)

+				return r;

+		}

+	}

+	/* write out the primary superblock */

+	aux_info->sb->block_group_index = to_le16(0);

+	return ext4_block_writebytes(bd, EXT4_SUPERBLOCK_OFFSET, aux_info->sb,

+			EXT4_SUPERBLOCK_SIZE);

+}

+int ext4_mkfs_read_info(struct ext4_blockdev *bd, struct ext4_mkfs_info *info)

+{

+	int r;

+	struct ext4_sblock *sb;

+	r = ext4_block_init(bd);

+	if (r != 0)

+		return r;

+	sb = ext4_malloc(EXT4_SUPERBLOCK_SIZE);

+	if (!sb)

+		goto Finish;

+	r = ext4_sb_read(bd, sb);

+	if (r != 0)

+		goto Finish;

+	r = sb2info(sb, info);

+Finish:

+	if (sb)

+		ext4_free(sb);

+	ext4_block_fini(bd);

+	return r;

+}

+static int mkfs_init(struct ext4_blockdev *bd, struct ext4_mkfs_info *info)

+{

+	int r;

+	struct fs_aux_info aux_info;

+	memset(&aux_info, 0, sizeof(struct fs_aux_info));

+	r = create_fs_aux_info(&aux_info, info);

+	if (r != 0)

+		goto Finish;

+	fill_sb(&aux_info, info);

+	r = write_bgroups(bd, &aux_info, info);

+	if (r != 0)

+		goto Finish;

+	r = write_sblocks(bd, &aux_info, info);

+	if (r != 0)

+		goto Finish;

+	Finish:

+	release_fs_aux_info(&aux_info);

+	return r;

+}

+static int init_bgs(struct ext4_fs *fs)

+{

+	int r = 0;

+	struct ext4_block_group_ref ref;

+	u32int i;

+	u32int bg_count = ext4_block_group_cnt(&fs->sb);

+	for (i = 0; i < bg_count; ++i) {

+		r = ext4_fs_get_block_group_ref(fs, i, &ref);

+		if (r != 0)

+			break;

+		r = ext4_fs_put_block_group_ref(&ref);

+		if (r != 0)

+			break;

+	}

+	return r;

+}

+static int alloc_inodes(struct ext4_fs *fs)

+{

+	int r = 0;

+	int i;

+	struct ext4_inode_ref inode_ref;

+	for (i = 1; i < 12; ++i) {

+		int filetype = EXT4_DE_REG_FILE;

+		switch (i) {

+		case EXT4_ROOT_INO:

+		case EXT4_GOOD_OLD_FIRST_INO:

+			filetype = EXT4_DE_DIR;

+			break;

+		default:

+			break;

+		}

+		r = ext4_fs_alloc_inode(fs, &inode_ref, filetype);

+		if (r != 0)

+			return r;

+		ext4_inode_set_mode(&fs->sb, inode_ref.inode, 0);

+		switch (i) {

+		case EXT4_ROOT_INO:

+		case EXT4_JOURNAL_INO:

+			ext4_fs_inode_blocks_init(fs, &inode_ref);

+			break;

+		}

+		ext4_fs_put_inode_ref(&inode_ref);

+	}

+	return r;

+}

+static int create_dirs(struct ext4_fs *fs)

+{

+	int r;

+	struct ext4_inode_ref root;

+	struct ext4_inode_ref child;

+	r = ext4_fs_get_inode_ref(fs, EXT4_ROOT_INO, &root);

+	if (r != 0)

+		return r;

+	r = ext4_fs_get_inode_ref(fs, EXT4_GOOD_OLD_FIRST_INO, &child);

+	if (r != 0)

+		return r;

+	ext4_inode_set_mode(&fs->sb, child.inode,

+			EXT4_INODE_MODE_DIRECTORY | 0777);

+	ext4_inode_set_mode(&fs->sb, root.inode,

+			EXT4_INODE_MODE_DIRECTORY | 0777);

+	/* Initialize directory index if supported */

+	if (ext4_sb_feature_com(&fs->sb, EXT4_FCOM_DIR_INDEX)) {

+		r = ext4_dir_dx_init(&root, &root);

+		if (r != 0)

+			return r;

+		r = ext4_dir_dx_init(&child, &root);

+		if (r != 0)

+			return r;

+		ext4_inode_set_flag(root.inode,	EXT4_INODE_FLAG_INDEX);

+		ext4_inode_set_flag(child.inode, EXT4_INODE_FLAG_INDEX);

+	} else {

+		r = ext4_dir_add_entry(&root, ".", strlen("."), &root);

+		if (r != 0)

+			return r;

+		r = ext4_dir_add_entry(&root, "..", strlen(".."), &root);

+		if (r != 0)

+			return r;

+		r = ext4_dir_add_entry(&child, ".", strlen("."), &child);

+		if (r != 0)

+			return r;

+		r = ext4_dir_add_entry(&child, "..", strlen(".."), &root);

+		if (r != 0)

+			return r;

+	}

+	r = ext4_dir_add_entry(&root, "lost+found", strlen("lost+found"), &child);

+	if (r != 0)

+		return r;

+	ext4_inode_set_links_cnt(root.inode, 3);

+	ext4_inode_set_links_cnt(child.inode, 2);

+	child.dirty = true;

+	root.dirty = true;

+	ext4_fs_put_inode_ref(&child);

+	ext4_fs_put_inode_ref(&root);

+	return r;

+}

+static int create_journal_inode(struct ext4_fs *fs,

+				struct ext4_mkfs_info *info)

+{

+	int ret;

+	struct ext4_inode_ref inode_ref;

+	u64int blocks_count;

+	if (!info->journal)

+		return 0;

+	ret = ext4_fs_get_inode_ref(fs, EXT4_JOURNAL_INO, &inode_ref);

+	if (ret != 0)

+		return ret;

+	struct ext4_inode *inode = inode_ref.inode;

+	ext4_inode_set_mode(&fs->sb, inode, EXT4_INODE_MODE_FILE | 0600);

+	ext4_inode_set_links_cnt(inode, 1);

+	blocks_count = ext4_inode_get_blocks_count(&fs->sb, inode);

+	while (blocks_count++ < info->journal_blocks)

+	{

+		ext4_fsblk_t fblock;

+		ext4_lblk_t iblock;

+		struct ext4_block blk;

+		ret = ext4_fs_append_inode_dblk(&inode_ref, &fblock, &iblock);

+		if (ret != 0)

+			goto Finish;

+		if (iblock != 0)

+			continue;

+		ret = ext4_block_get(fs->bdev, &blk, fblock);

+		if (ret != 0)

+			goto Finish;

+		struct jbd_sb * jbd_sb = (struct jbd_sb * )blk.data;

+		memset(jbd_sb, 0, sizeof(struct jbd_sb));

+		jbd_sb->header.magic = to_be32(JBD_MAGIC_NUMBER);

+		jbd_sb->header.blocktype = to_be32(JBD_SUPERBLOCK_V2);

+		jbd_sb->blocksize = to_be32(info->block_size);

+		jbd_sb->maxlen = to_be32(info->journal_blocks);

+		jbd_sb->nr_users = to_be32(1);

+		jbd_sb->first = to_be32(1);

+		jbd_sb->sequence = to_be32(1);

+		ext4_bcache_set_dirty(blk.buf);

+		ret = ext4_block_set(fs->bdev, &blk);

+		if (ret != 0)

+			goto Finish;

+	}

+	memcpy(fs->sb.journal_blocks, inode->blocks, sizeof(inode->blocks));

+	Finish:

+	ext4_fs_put_inode_ref(&inode_ref);

+	return ret;

+}

+int ext4_mkfs(struct ext4_fs *fs, struct ext4_blockdev *bd,

+	      struct ext4_mkfs_info *info, int fs_type)

+{

+	int r;

+	r = ext4_block_init(bd);

+	if (r != 0)

+		return r;

+	bd->fs = fs;

+	if (info->len == 0)

+		info->len = bd->part_size;

+	if (info->block_size == 0)

+		info->block_size = 4096; /*Set block size to default value*/

+	/* Round down the filesystem length to be a multiple of the block size */

+	info->len &= ~((u64int)info->block_size - 1);

+	if (info->journal_blocks == 0)

+		info->journal_blocks = compute_journal_blocks(info);

+	if (info->blocks_per_group == 0)

+		info->blocks_per_group = compute_blocks_per_group(info);

+	if (info->inodes == 0)

+		info->inodes = compute_inodes(info);

+	if (info->inode_size == 0)

+		info->inode_size = 256;

+	info->inodes_per_group = compute_inodes_per_group(info);

+	switch (fs_type) {

+	case 2:

+		info->feat_compat = EXT2_SUPPORTED_FCOM;

+		info->feat_ro_compat = EXT2_SUPPORTED_FRO_COM;

+		info->feat_incompat = EXT2_SUPPORTED_FINCOM;

+		break;

+	case 3:

+		info->feat_compat = EXT3_SUPPORTED_FCOM;

+		info->feat_ro_compat = EXT3_SUPPORTED_FRO_COM;

+		info->feat_incompat = EXT3_SUPPORTED_FINCOM;

+		break;

+	case 4:

+		info->feat_compat = EXT4_SUPPORTED_FCOM;

+		info->feat_ro_compat = EXT4_SUPPORTED_FRO_COM;

+		info->feat_incompat = EXT4_SUPPORTED_FINCOM;

+		break;

+	}

+	/*TODO: handle this features some day...*/

+	info->feat_incompat &= ~EXT4_FINCOM_META_BG;

+	info->feat_incompat &= ~EXT4_FINCOM_FLEX_BG;

+	info->feat_incompat &= ~EXT4_FINCOM_64BIT;

+	info->feat_ro_compat &= ~EXT4_FRO_COM_METADATA_CSUM;

+	info->feat_ro_compat &= ~EXT4_FRO_COM_GDT_CSUM;

+	info->feat_ro_compat &= ~EXT4_FRO_COM_DIR_NLINK;

+	info->feat_ro_compat &= ~EXT4_FRO_COM_EXTRA_ISIZE;

+	info->feat_ro_compat &= ~EXT4_FRO_COM_HUGE_FILE;

+	if (info->journal)

+		info->feat_compat |= EXT4_FCOM_HAS_JOURNAL;

+	if (info->dsc_size == 0) {

+		if (info->feat_incompat & EXT4_FINCOM_64BIT)

+			info->dsc_size = EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE;

+		else

+			info->dsc_size = EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE;

+	}

+	info->bg_desc_reserve_blocks = 0;

+	ext4_dbg(DEBUG_MKFS, DBG_INFO "Creating filesystem with parameters:\n");

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Size: %llud\n", info->len);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Block size: %ud\n",

+			info->block_size);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Blocks per group: %ud\n",

+			info->blocks_per_group);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Inodes per group: %ud\n",

+			info->inodes_per_group);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Inode size: %ud\n",

+			info->inode_size);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Inodes: %ud\n", info->inodes);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Journal blocks: %ud\n",

+			info->journal_blocks);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Features ro_compat: 0x%x\n",

+			info->feat_ro_compat);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Features compat: 0x%x\n",

+			info->feat_compat);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Features incompat: 0x%x\n",

+			info->feat_incompat);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "BG desc reserve: %ud\n",

+			info->bg_desc_reserve_blocks);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Descriptor size: %uhd\n",

+			info->dsc_size);

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "journal: %s\n",

+			info->journal ? "yes" : "no");

+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Label: %s\n", info->label);

+	struct ext4_bcache bc;

+	memset(&bc, 0, sizeof(struct ext4_bcache));

+	ext4_block_set_lb_size(bd, info->block_size);

+	r = ext4_bcache_init_dynamic(&bc, CONFIG_BLOCK_DEV_CACHE_SIZE,

+				      info->block_size);

+	if (r != 0)

+		goto block_fini;

+	/*Bind block cache to block device*/

+	r = ext4_block_bind_bcache(bd, &bc);

+	if (r != 0)

+		goto cache_fini;

+	r = ext4_block_cache_write_back(bd, 1);

+	if (r != 0)

+		goto cache_fini;

+	r = mkfs_init(bd, info);

+	if (r != 0)

+		goto cache_fini;

+	r = ext4_fs_init(fs, bd, false);

+	if (r != 0)

+		goto cache_fini;

+	r = init_bgs(fs);

+	if (r != 0)

+		goto fs_fini;

+	r = alloc_inodes(fs);

+	if (r != 0)

+		goto fs_fini;

+	r = create_dirs(fs);

+	if (r != 0)

+		goto fs_fini;

+	r = create_journal_inode(fs, info);

+	if (r != 0)

+		goto fs_fini;

+	fs_fini:

+	ext4_fs_fini(fs);

+	cache_fini:

+	ext4_block_cache_write_back(bd, 0);

+	ext4_bcache_fini_dynamic(&bc);

+	block_fini:

+	ext4_block_fini(bd);

+	return r;

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_super.c

@@ -1,0 +1,221 @@

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+#include "ext4_debug.h"

+#include "ext4_super.h"

+#include "ext4_crc32.h"

+u32int ext4_block_group_cnt(struct ext4_sblock *s)

+{

+	u64int blocks_count = ext4_sb_get_blocks_cnt(s);

+	u32int blocks_per_group = ext4_get32(s, blocks_per_group);

+	u32int block_groups_count = (u32int)(blocks_count / blocks_per_group);

+	if (blocks_count % blocks_per_group)

+		block_groups_count++;

+	return block_groups_count;

+}

+u32int ext4_blocks_in_group_cnt(struct ext4_sblock *s, u32int bgid)

+{

+	u32int block_group_count = ext4_block_group_cnt(s);

+	u32int blocks_per_group = ext4_get32(s, blocks_per_group);

+	u64int total_blocks = ext4_sb_get_blocks_cnt(s);

+	if (bgid < block_group_count - 1)

+		return blocks_per_group;

+	return (u32int)(total_blocks - ((block_group_count - 1) * blocks_per_group));

+}

+u32int ext4_inodes_in_group_cnt(struct ext4_sblock *s, u32int bgid)

+{

+	u32int block_group_count = ext4_block_group_cnt(s);

+	u32int inodes_per_group = ext4_get32(s, inodes_per_group);

+	u32int total_inodes = ext4_get32(s, inodes_count);

+	if (bgid < block_group_count - 1)

+		return inodes_per_group;

+	return (total_inodes - ((block_group_count - 1) * inodes_per_group));

+}

+static u32int ext4_sb_csum(struct ext4_sblock *s)

+{

+	return ext4_crc32c(EXT4_CRC32_INIT, s,

+			offsetof(struct ext4_sblock, checksum));

+}

+static bool ext4_sb_verify_csum(struct ext4_sblock *s)

+{

+	if (!ext4_sb_feature_ro_com(s, EXT4_FRO_COM_METADATA_CSUM))

+		return true;

+	if (s->checksum_type != to_le32(EXT4_CHECKSUM_CRC32C))

+		return false;

+	return s->checksum == to_le32(ext4_sb_csum(s));

+}

+void ext4_sb_set_csum(struct ext4_sblock *s)

+{

+	if (!ext4_sb_feature_ro_com(s, EXT4_FRO_COM_METADATA_CSUM))

+		return;

+	s->checksum = to_le32(ext4_sb_csum(s));

+}

+int ext4_sb_write(struct ext4_blockdev *bdev, struct ext4_sblock *s)

+{

+	ext4_sb_set_csum(s);

+	return ext4_block_writebytes(bdev, EXT4_SUPERBLOCK_OFFSET, s,

+				     EXT4_SUPERBLOCK_SIZE);

+}

+int ext4_sb_read(struct ext4_blockdev *bdev, struct ext4_sblock *s)

+{

+	return ext4_block_readbytes(bdev, EXT4_SUPERBLOCK_OFFSET, s,

+				    EXT4_SUPERBLOCK_SIZE);

+}

+bool ext4_sb_check(struct ext4_sblock *s)

+{

+	if (ext4_get16(s, magic) != EXT4_SUPERBLOCK_MAGIC)

+		return false;

+	if (ext4_get32(s, inodes_count) == 0)

+		return false;

+	if (ext4_sb_get_blocks_cnt(s) == 0)

+		return false;

+	if (ext4_get32(s, blocks_per_group) == 0)

+		return false;

+	if (ext4_get32(s, inodes_per_group) == 0)

+		return false;

+	if (ext4_get16(s, inode_size) < 128)

+		return false;

+	if (ext4_get32(s, first_inode) < 11)

+		return false;

+	if (ext4_sb_get_desc_size(s) < EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		return false;

+	if (ext4_sb_get_desc_size(s) > EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		return false;

+	if (!ext4_sb_verify_csum(s))

+		return false;

+	return true;

+}

+static inline int is_power_of(u32int a, u32int b)

+{

+	while (1) {

+		if (a < b)

+			return 0;

+		if (a == b)

+			return 1;

+		if ((a % b) != 0)

+			return 0;

+		a = a / b;

+	}

+}

+bool ext4_sb_sparse(u32int group)

+{

+	if (group <= 1)

+		return 1;

+	if (!(group & 1))

+		return 0;

+	return (is_power_of(group, 7) || is_power_of(group, 5) ||

+		is_power_of(group, 3));

+}

+bool ext4_sb_is_super_in_bg(struct ext4_sblock *s, u32int group)

+{

+	if (ext4_sb_feature_ro_com(s, EXT4_FRO_COM_SPARSE_SUPER) &&

+	    !ext4_sb_sparse(group))

+		return false;

+	return true;

+}

+static u32int ext4_bg_num_gdb_meta(struct ext4_sblock *s, u32int group)

+{

+	u32int dsc_per_block =

+	    ext4_sb_get_block_size(s) / ext4_sb_get_desc_size(s);

+	u32int metagroup = group / dsc_per_block;

+	u32int first = metagroup * dsc_per_block;

+	u32int last = first + dsc_per_block - 1;

+	if (group == first || group == first + 1 || group == last)

+		return 1;

+	return 0;

+}

+static u32int ext4_bg_num_gdb_nometa(struct ext4_sblock *s, u32int group)

+{

+	if (!ext4_sb_is_super_in_bg(s, group))

+		return 0;

+	u32int dsc_per_block =

+	    ext4_sb_get_block_size(s) / ext4_sb_get_desc_size(s);

+	u32int db_count =

+	    (ext4_block_group_cnt(s) + dsc_per_block - 1) / dsc_per_block;

+	if (ext4_sb_feature_incom(s, EXT4_FINCOM_META_BG))

+		return ext4_sb_first_meta_bg(s);

+	return db_count;

+}

+u32int ext4_bg_num_gdb(struct ext4_sblock *s, u32int group)

+{

+	u32int dsc_per_block =

+	    ext4_sb_get_block_size(s) / ext4_sb_get_desc_size(s);

+	u32int first_meta_bg = ext4_sb_first_meta_bg(s);

+	u32int metagroup = group / dsc_per_block;

+	if (!ext4_sb_feature_incom(s,EXT4_FINCOM_META_BG) ||

+	    metagroup < first_meta_bg)

+		return ext4_bg_num_gdb_nometa(s, group);

+	return ext4_bg_num_gdb_meta(s, group);

+}

+u32int ext4_num_base_meta_clusters(struct ext4_sblock *s,

+				     u32int block_group)

+{

+	u32int num;

+	u32int dsc_per_block =

+	    ext4_sb_get_block_size(s) / ext4_sb_get_desc_size(s);

+	num = ext4_sb_is_super_in_bg(s, block_group);

+	if (!ext4_sb_feature_incom(s, EXT4_FINCOM_META_BG) ||

+	    block_group < ext4_sb_first_meta_bg(s) * dsc_per_block) {

+		if (num) {

+			num += ext4_bg_num_gdb(s, block_group);

+			num += ext4_get16(s, s_reserved_gdt_blocks);

+		}

+	} else {

+		num += ext4_bg_num_gdb(s, block_group);

+	}

+	u32int clustersize = 1024 << ext4_get32(s, log_cluster_size);

+	u32int cluster_ratio = clustersize / ext4_sb_get_block_size(s);

+	u32int v =

+	    (num + cluster_ratio - 1) >> ext4_get32(s, log_cluster_size);

+	return v;

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4_trans.c

@@ -1,0 +1,61 @@

+#include "ext4_config.h"

+#include "ext4.h"

+#include "ext4_fs.h"

+#include "ext4_journal.h"

+int ext4_trans_set_block_dirty(struct ext4_buf *buf)

+{

+	int r = 0;

+	struct ext4_fs *fs = buf->bc->bdev->fs;

+	struct ext4_block block = {

+		.lb_id = buf->lba,

+		.data = buf->data,

+		.buf = buf

+	};

+	if (fs->jbd_journal && fs->curr_trans) {

+		struct jbd_trans *trans = fs->curr_trans;

+		return jbd_trans_set_block_dirty(trans, &block);

+	}

+	ext4_bcache_set_dirty(buf);

+	return r;

+}

+int ext4_trans_block_get_noread(struct ext4_blockdev *bdev,

+			  struct ext4_block *b,

+			  u64int lba)

+{

+	int r = ext4_block_get_noread(bdev, b, lba);

+	if (r != 0)

+		return r;

+	return r;

+}

+int ext4_trans_block_get(struct ext4_blockdev *bdev,

+		   struct ext4_block *b,

+		   u64int lba)

+{

+	int r = ext4_block_get(bdev, b, lba);

+	if (r != 0)

+		return r;

+	return r;

+}

+int ext4_trans_try_revoke_block(struct ext4_blockdev *bdev, u64int lba)

+{

+	int r = 0;

+	struct ext4_fs *fs = bdev->fs;

+	if (fs->jbd_journal && fs->curr_trans) {

+		struct jbd_trans *trans = fs->curr_trans;

+		r = jbd_trans_try_revoke_block(trans, lba);

+	} else if (fs->jbd_journal) {

+		r = ext4_block_flush_lba(fs->bdev, lba);

+	}

+	return r;

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/ext4srv.c

@@ -1,0 +1,1050 @@

+#include "ext4_config.h"

+#include "ext4.h"

+#include <fcall.h>

+#include <thread.h>

+#include <9p.h>

+#include <bio.h>

+#include "ext4_inode.h"

+#include "group.h"

+#include "common.h"

+#define MIN(a,b) ((a)<(b)?(a):(b))

+int mainstacksize = 65536;

+typedef struct Aux Aux;

+struct Aux {

+	Part *p;

+	u32int uid;

+	char *path;

+	int doff;

+	union {

+		ext4_file *file;

+		ext4_dir *dir;

+	};

+	int type;

+};

+enum {

+	Adir,

+	Afile,

+};

+static Opts opts = {

+	.group = nil,

+	.cachewb = 0,

+	.asroot = 0,

+	.rdonly = 0,

+	.linkmode = Lhide,

+	.fstype = -1,

+	.blksz = 1024,

+	.label = "",

+	.inodesz = 256,

+	.ninode = 0,

+};

+static u32int Root;

+static u8int zero[65536];

+static char *srvname = "ext4";

+static char *

+linkresolve(Aux *a, char *s, char **value)

+{

+	char *q, buf[4096+1];

+	usize sz;

+	int res;

+	res = 0;

+	if(opts.linkmode == Lresolve && (res = ext4_readlink(s, buf, sizeof(buf), &sz)) == 0){

+		if(sz == sizeof(buf)){

+			werrstr("readlink: %s: path too long", s);

+			free(s);

+			return nil;

+		}

+		buf[sz] = 0;

+		if(value != nil)

+			*value = strdup(buf);

+		cleanname(buf);

+		if(buf[0] == '/'){

+			free(s);

+			s = smprint("%M%s", a->p, buf);

+		}else{

+			q = strrchr(s, '/');

+			*q = 0;

+			q = s;

+			s = smprint("%s/%s", q, buf);

+			free(q);

+			cleanname(strchr(s+1, '/'));

+		}

+	}else{

+		if(res != 0)

+			werrstr("readlink: %s: %r", s);

+		if(value != nil)

+			*value = nil;

+	}

+	return s;

+}

+static char *

+fullpath(Aux *a)

+{

+	return linkresolve(a, smprint("%M/%s", a->p, a->path), nil);

+}

+static int

+haveperm(Aux *a, int p, struct ext4_inode *inodeout)

+{

+	struct ext4_inode inode;

+	u32int ino, id;

+	int m, fm;

+	Group *g;

+	char *s;

+	switch(p & 3){

+	case OREAD:

+		p = AREAD;

+		break;

+	case OWRITE:

+		p = AWRITE;

+		break;

+	case ORDWR:

+		p = AREAD|AWRITE;

+		break;

+	case OEXEC:

+		p = AEXEC;

+		break;

+	default:

+		return 0;

+	}

+	if(p & OTRUNC)

+		p |= AWRITE;

+	if((s = fullpath(a)) == nil)

+		return -1;

+	if(ext4_raw_inode_fill(s, &ino, &inode) != 0){

+		werrstr("%s: %r", s);

+		free(s);

+		return -1;

+	}

+	free(s);

+	if(inodeout != nil)

+		memmove(inodeout, &inode, sizeof(inode));

+	fm = ext4_inode_get_mode(a->p->sb, &inode);

+	/* other */

+	m = fm & 7;

+	if((p & m) == p)

+		return 1;

+	/* owner */

+	id = ext4_inode_get_uid(&inode);

+	if(a->uid == Root || ((g = findgroupid(&a->p->groups, id)) != nil && ingroup(g, a->uid))){

+		m |= (fm >> 6) & 7;

+		if((p & m) == p)

+			return 1;

+	}

+	/* group */

+	id = ext4_inode_get_gid(&inode);

+	if(a->uid == Root || ((g = findgroupid(&a->p->groups, id)) != nil && ingroup(g, a->uid))){

+		m |= (fm >> 3) & 7;

+		if((p & m) == p)

+			return 1;

+	}

+	return 0;

+}

+static void

+rattach(Req *r)

+{

+	char err[ERRMAX];

+	Aux *a;

+	if((a = calloc(1, sizeof(*a))) == nil)

+		respond(r, "memory");

+	else if((a->p = openpart(r->ifcall.aname, &opts)) == nil){

+		free(a);

+		rerrstr(err, sizeof(err));

+		respond(r, err);

+	}else{

+		if(opts.asroot || findgroup(&a->p->groups, r->ifcall.uname, &a->uid) == nil)

+			a->uid = Root;

+		incref(a->p);

+		a->type = Adir;

+		a->path = strdup("");

+		r->ofcall.qid = a->p->qidmask;

+		r->fid->qid = a->p->qidmask;

+		r->fid->aux = a;

+		respond(r, nil);

+	}

+}

+static u32int

+toext4mode(u32int mode, u32int perm, int creat)

+{

+	u32int e;

+	e = 0;

+	mode &= ~OCEXEC;

+	if(mode & OTRUNC)

+		e |= O_TRUNC;

+	mode &= 3;

+	if(mode == OWRITE)

+		e |= O_WRONLY;

+	else if(mode == ORDWR)

+		e |= O_RDWR;

+	if(creat)

+		e |= O_CREAT;

+	if(perm & DMEXCL)

+		e |= O_EXCL;

+	if(perm & DMAPPEND)

+		e |= O_APPEND;

+	return e;

+}

+static void

+ropen(Req *r)

+{

+	char *path;

+	int res;

+	Aux *a;

+	a = r->fid->aux;

+	switch(a->type){

+	case Adir:

+		if(r->ifcall.mode != OREAD || !haveperm(a, r->ifcall.mode, nil)){

+			respond(r, Eperm);

+			return;

+		}

+		if(a->dir != nil){

+			respond(r, "double open");

+			return;

+		}

+		if((a->dir = malloc(sizeof(*a->dir))) == nil)

+			goto Nomem;

+		if((path = smprint("%M/%s", a->p, a->path)) == nil){

+			free(a->dir);

+			a->dir = nil;

+			goto Nomem;

+		}

+		res = ext4_dir_open(a->dir, path);

+		free(path);

+		if(res != 0){

+			free(a->dir);

+			a->dir = nil;

+			responderror(r);

+			return;

+		}

+		break;

+	case Afile:

+		if(!haveperm(a, r->ifcall.mode, nil)){

+			respond(r, Eperm);

+			return;

+		}

+		if(a->file != nil){

+			respond(r, "double open");

+			return;

+		}

+		if((a->file = malloc(sizeof(*a->file))) == nil)

+			goto Nomem;

+		if((path = smprint("%M/%s", a->p, a->path)) == nil){

+			free(a->file);

+			a->file = nil;

+			goto Nomem;

+		}

+		res = ext4_fopen2(a->file, path, toext4mode(r->ifcall.mode, 0, 0));

+		free(path);

+		if(res != 0){

+			free(a->file);

+			a->file = nil;

+			responderror(r);

+			return;

+		}

+		break;

+Nomem:

+		respond(r, "memory");

+		return;

+	}

+	r->ofcall.iounit = 0;

+	respond(r, nil);

+}

+static void

+rcreate(Req *r)

+{

+	u32int perm, dirperm, t;

+	struct ext4_inode inode;

+	char *s, *q;

+	int mkdir;

+	long tm;

+	Aux *a;

+	a = r->fid->aux;

+	s = nil;

+	if(a->file != nil || a->dir != nil){

+		werrstr("double create");

+		goto error;

+	}

+	if(!haveperm(a, OWRITE, &inode)){

+		werrstr(Eperm);

+		goto error;

+	}

+	/* first make sure this is a directory */

+	t = ext4_inode_type(a->p->sb, &inode);

+	if(t != EXT4_INODE_MODE_DIRECTORY){

+		werrstr("create in non-directory");

+		goto error;

+	}

+	if((s = fullpath(a)) == nil)

+		goto error;

+	ext4_mode_get(s, &dirperm);

+	/* check if the entry already exists */

+	if((q = smprint("%s/%s", s, r->ifcall.name)) == nil){

+Nomem:

+		werrstr("memory");

+		goto error;

+	}

+	free(s);

+	s = q;

+	cleanname(s);

+	if(ext4_inode_exist(s, EXT4_DE_UNKNOWN) == 0){

+		werrstr("file already exists");

+		goto error;

+	}

+	mkdir = r->ifcall.perm & DMDIR;

+	perm = mkdir ? 0666 : 0777;

+	perm = r->ifcall.perm & (~perm | (dirperm & perm));

+	if(mkdir){

+		a->type = Adir;

+		if(ext4_dir_mk(s) != 0)

+			goto error;

+		if((a->dir = malloc(sizeof(*a->dir))) == nil)

+			goto Nomem;

+		if(ext4_dir_open(a->dir, s) < 0){

+			free(a->dir);

+			a->dir = nil;

+			goto ext4errorrm;

+		}

+	}else{

+		a->type = Afile;

+		if((a->file = malloc(sizeof(*a->file))) == nil)

+			goto Nomem;

+		if(ext4_fopen2(a->file, s, toext4mode(r->ifcall.mode, perm, 1)) < 0){

+			free(a->file);

+			a->file = nil;

+			goto error;

+		}

+	}

+	if(ext4_mode_set(s, perm) < 0)

+		goto ext4errorrm;

+	ext4_owner_set(s, a->uid, a->uid);

+	tm = time(nil);

+	ext4_mtime_set(s, tm);

+	ext4_ctime_set(s, tm);

+	r->fid->qid.path = a->p->qidmask.path | a->file->inode;

+	r->fid->qid.vers = 0;

+	r->fid->qid.type = 0;

+	r->ofcall.qid = r->fid->qid;

+	free(a->path);

+	a->path = strdup(strchr(s+1, '/')+1);

+	free(s);

+	r->ofcall.iounit = 0;

+	respond(r, nil);

+	return;

+ext4errorrm:

+	if(mkdir)

+		ext4_dir_rm(s);

+	else

+		ext4_fremove(s);

+error:

+	free(s);

+	responderror(r);

+}

+static int

+dirfill(Dir *dir, Aux *a, char *path)

+{

+	struct ext4_inode inode;

+	u32int t, ino, id;

+	char tmp[16];

+	char *s, *q;

+	Group *g;

+	memset(dir, 0, sizeof(*dir));

+	if(path == nil){

+		path = a->path;

+		s = smprint("%M/%s", a->p, a->path);

+	}else{

+		if(*a->path == 0 && *path == 0)

+			path = "/";

+		s = smprint("%M%s%s/%s", a->p, *a->path ? "/" : "", a->path, path);

+	}

+	if((s = linkresolve(a, s, nil)) == nil)

+		return -1;

+	if(ext4_raw_inode_fill(s, &ino, &inode) < 0){

+		werrstr("inode: %s: %r", s);

+		free(s);

+		return -1;

+	}

+	t = ext4_inode_type(a->p->sb, &inode);

+	if(opts.linkmode == Lhide && t == EXT4_INODE_MODE_SOFTLINK){

+		werrstr("softlinks resolving disabled");

+		free(s);

+		return -1;

+	}

+	dir->mode = ext4_inode_get_mode(a->p->sb, &inode) & 0x1ff;

+	dir->qid.path = a->p->qidmask.path | ino;

+	dir->qid.vers = ext4_inode_get_generation(&inode);

+	dir->qid.type = 0;

+	if(t == EXT4_INODE_MODE_DIRECTORY){

+		dir->qid.type |= QTDIR;

+		dir->mode |= DMDIR;

+	}else

+		dir->length = ext4_inode_get_size(a->p->sb, &inode);

+	if(ext4_inode_get_flags(&inode) & EXT4_INODE_FLAG_APPEND){

+		dir->qid.type |= QTAPPEND;

+		dir->mode |= DMAPPEND;

+	}

+	if((q = strrchr(path, '/')) != nil)

+		path = q+1;

+	dir->name = estrdup9p(path);

+	dir->atime = ext4_inode_get_access_time(&inode);

+	dir->mtime = ext4_inode_get_modif_time(&inode);

+	sprint(tmp, "%ud", id = ext4_inode_get_uid(&inode));

+	dir->uid = estrdup9p((g = findgroupid(&a->p->groups, id)) != nil ? g->name : tmp);

+	sprint(tmp, "%ud", id = ext4_inode_get_gid(&inode));

+	dir->gid = estrdup9p((g = findgroupid(&a->p->groups, id)) != nil ? g->name : tmp);

+	free(s);

+	return 0;

+}

+static int

+dirgen(int n, Dir *dir, void *aux)

+{

+	const ext4_direntry *e;

+	Aux *a;

+	a = aux;

+	if(n == 0 || n != a->doff){

+		ext4_dir_entry_rewind(a->dir);

+		a->doff = 0;

+	}

+	for(;;){

+		do{

+			if((e = ext4_dir_entry_next(a->dir)) == nil)

+				return -1;

+		}while(e->name == nil || strcmp((char*)e->name, ".") == 0 || strcmp((char*)e->name, "..") == 0);

+		if(opts.linkmode == Lhide && e->inode_type == EXT4_DE_SYMLINK)

+			continue;

+		if(a->doff++ != n)

+			continue;

+		if(dirfill(dir, a, (char*)e->name) == 0)

+			return 0;

+		a->doff--;

+	}

+}

+static void

+rread(Req *r)

+{

+	usize n;

+	Aux *a;

+	a = r->fid->aux;

+	if(a->type == Adir && a->dir != nil){

+		dirread9p(r, dirgen, a);

+	}else if(a->type == Afile && a->file != nil){

+		if(ext4_fseek(a->file, r->ifcall.offset, 0) != 0)

+			n = 0;

+		else if(ext4_fread(a->file, r->ofcall.data, r->ifcall.count, &n) < 0){

+			responderror(r);

+			return;

+		}

+		r->ofcall.count = n;

+	}

+	respond(r, nil);

+}

+static void

+rwrite(Req *r)

+{

+	usize n, sz;

+	Aux *a;

+	a = r->fid->aux;

+	if(a->type == Adir){

+		respond(r, "can't write to dir");

+	}else if(a->type == Afile){

+		while(ext4_fsize(a->file) < r->ifcall.offset){

+			ext4_fseek(a->file, 0, 2);

+			sz = MIN(r->ifcall.offset-ext4_fsize(a->file), sizeof(zero));

+			if(ext4_fwrite(a->file, zero, sz, &n) < 0)

+				goto error;

+		}

+		if(ext4_fseek(a->file, r->ifcall.offset, 0) < 0)

+			goto error;

+		if(ext4_fwrite(a->file, r->ifcall.data, r->ifcall.count, &n) < 0)

+			goto error;

+		r->ofcall.count = n;

+		respond(r, nil);

+	}

+	return;

+error:

+	responderror(r);

+}

+static void

+rremove(Req *r)

+{

+	struct ext4_inode inode;

+	const ext4_direntry *e;

+	u32int ino, t, empty;

+	ext4_dir dir;

+	Group *g;

+	char *s;

+	Aux *a;

+	a = r->fid->aux;

+	/* do not resolve links here as most likely it's JUST the link we want to remove */

+	if((s = smprint("%M/%s", a->p, a->path)) == nil){

+		werrstr("memory");

+		goto error;

+	}

+	if(ext4_raw_inode_fill(s, &ino, &inode) < 0)

+		goto error;

+	if(a->uid == Root || ((g = findgroupid(&a->p->groups, ext4_inode_get_uid(&inode))) != nil && g->id == a->uid)){

+		t = ext4_inode_type(a->p->sb, &inode);

+		if(t == EXT4_INODE_MODE_DIRECTORY && ext4_dir_open(&dir, s) == 0){

+			for(empty = 1; empty;){

+				if((e = ext4_dir_entry_next(&dir)) == nil)

+					break;

+				empty = e->name == nil || strcmp((char*)e->name, ".") == 0 || strcmp((char*)e->name, "..") == 0;

+			}

+			ext4_dir_close(&dir);

+			if(!empty){

+				werrstr("directory not empty");

+				goto error;

+			}else if(ext4_dir_rm(s) < 0)

+				goto error;

+		}else if(ext4_fremove(s) < 0)

+			goto error;

+	}else{

+		werrstr(Eperm);

+		goto error;

+	}

+	free(s);

+	respond(r, nil);

+	return;

+error:

+	free(s);

+	responderror(r);

+}

+static void

+rstat(Req *r)

+{

+	Aux *a;

+	a = r->fid->aux;

+	if(dirfill(&r->d, a, nil) != 0)

+		responderror(r);

+	else

+		respond(r, nil);

+}

+static void

+rwstat(Req *r)

+{

+	int res, isdir, wrperm, isowner, n;

+	struct ext4_inode inode;

+	char *old, *new, *s;

+	u32int uid, gid;

+	ext4_file f;

+	Aux *a, o;

+	Group *g;

+	a = r->fid->aux;

+	old = nil;

+	new = nil;

+	/* can't do anything to root, can't change the owner */

+	if(a->path[0] == 0 || (r->d.uid != nil && r->d.uid[0] != 0)){

+		werrstr(Eperm);

+		goto error;

+	}

+	if((old = smprint("%M/%s", a->p, a->path)) == nil){

+		werrstr("memory");

+		goto error;

+	}

+	new = old;

+	wrperm = haveperm(a, OWRITE, &inode);

+	uid = ext4_inode_get_uid(&inode);

+	isowner = uid == Root || a->uid == uid;

+	/* permission to truncate */

+	isdir = ext4_inode_type(a->p->sb, &inode) == EXT4_INODE_MODE_DIRECTORY;

+	if(r->d.length >= 0 && (!wrperm || isdir || !ext4_inode_can_truncate(a->p->sb, &inode))){

+		werrstr(Eperm);

+		goto error;

+	}

+	/* permission to rename */

+	if(r->d.name != nil && r->d.name[0] != 0){

+		if((s = strrchr(old, '/')) == nil){

+			werrstr("botched name");

+			goto error;

+		}

+		n = s - old;

+		if((new = malloc(n + 1 + strlen(r->d.name) + 1)) == nil){

+			werrstr("memory");

+			goto error;

+		}

+		memmove(new, old, n);

+		new[n++] = '/';

+		strcpy(new+n, r->d.name);

+		/* check parent write permission */

+		o = *a;

+		o.path = old;

+		if(!haveperm(&o, OWRITE, nil)){

+			werrstr(Eperm);

+			goto error;

+		}

+		*s = '/';

+	}

+	/* permission to change mode */

+	if(r->d.mode != ~0){

+		/* has to be owner and can't change dir bit */

+		if(!isowner || (!!isdir != !!(r->d.mode & DMDIR))){

+			werrstr(Eperm);

+			goto error;

+		}

+	}

+	/* permission to change mtime */

+	if(r->d.mtime != ~0 && !isowner){

+		werrstr(Eperm);

+		goto error;

+	}

+	/* permission to change gid */

+	if(r->d.gid != nil && r->d.gid[0] != 0){

+		/* has to be the owner, group has to exist, must be in that group */

+		if(!isowner || (g = findgroup(&a->p->groups, r->d.gid, &gid)) == nil || !ingroup(g, a->uid)){

+			werrstr(Eperm);

+			goto error;

+		}

+	}

+	/* done checking permissions, now apply all the changes and hope it all works */

+	/* rename */

+	if(r->d.name != nil && r->d.name[0] != 0){

+		if(ext4_frename(old, new) < 0)

+			goto error;

+		free(old);

+		old = new;

+		new = nil;

+		free(a->path);

+		a->path = strdup(strchr(old+1, '/')+1);

+	}

+	/* truncate */

+	if(r->d.length >= 0){

+		if(ext4_fopen2(&f, new, toext4mode(OWRITE, 0, 0)) < 0)

+			goto error;

+		res = ext4_ftruncate(&f, r->d.length);

+		ext4_fclose(&f);

+		if(res != 0)

+			goto error;

+	}

+	/* mode */

+	if(r->d.mode != ~0 && ext4_mode_set(new, r->d.mode & 0x1ff) < 0)

+		goto error;

+	/* mtime */

+	if(r->d.mtime != ~0 && ext4_mtime_set(new, r->d.mtime) < 0)

+		goto error;

+	/* gid */

+	if(r->d.gid != nil && r->d.gid[0] != 0 && ext4_owner_set(new, uid, gid) < 0)

+		goto error;

+	free(old);

+	if(new != old)

+		free(new);

+	respond(r, nil);

+	return;

+error:

+	free(old);

+	if(new != old)

+		free(new);

+	responderror(r);

+}

+static char *

+rwalk1(Fid *fid, char *name, Qid *qid)

+{

+	static char errbuf[ERRMAX];

+	struct ext4_inode inode;

+	u32int ino, t;

+	Aux *a, dir;

+	char *s, *q;

+	a = fid->aux;

+	/* try walking to the real file first */

+	if((s = fullpath(a)) == nil){

+		/* else try link itself. might want to just remove it anyway */

+		if((s = smprint("%M/%s", a->p, a->path)) == nil)

+			return "memory";

+	}

+	if(ext4_raw_inode_fill(s, &ino, &inode) < 0)

+		goto error;

+	t = ext4_inode_type(a->p->sb, &inode);

+	if(t != EXT4_INODE_MODE_DIRECTORY){

+		free(s);

+		return "not a directory";

+	}

+	dir = *a;

+	dir.path = strchr(s+1, '/')+1;

+	if(!haveperm(&dir, OEXEC, nil)){

+		free(s);

+		return Eperm;

+	}

+	q = s;

+	s = smprint("%s/%s", q, name);

+	cleanname(strchr(s+1, '/'));

+	free(q);

+	if((q = linkresolve(a, s, nil)) == nil){

+error:

+		free(s);

+		rerrstr(errbuf, sizeof(errbuf));

+		return errbuf;

+	}

+	s = q;

+	if(ext4_raw_inode_fill(s, &ino, &inode) < 0)

+		goto error;

+	t = ext4_inode_type(a->p->sb, &inode);

+	if(opts.linkmode == Lhide && t == EXT4_INODE_MODE_SOFTLINK){

+		free(s);

+		return "not found";

+	}

+	qid->type = 0;

+	qid->path = a->p->qidmask.path | ino;

+	qid->vers = ext4_inode_get_generation(&inode);

+	if(t == EXT4_INODE_MODE_DIRECTORY){

+		qid->type |= QTDIR;

+		a->type = Adir;

+	}else

+		a->type = Afile;

+	if(ext4_inode_get_flags(&inode) & EXT4_INODE_FLAG_APPEND)

+		qid->type |= QTAPPEND;

+	free(a->path);

+	a->path = strdup(strchr(s+1, '/')+1);

+	free(s);

+	fid->qid = *qid;

+	return nil;

+}

+static char *

+rclone(Fid *oldfid, Fid *newfid)

+{

+	Aux *a, *c;

+	a = oldfid->aux;

+	if((c = calloc(1, sizeof(*c))) == nil)

+		return "memory";

+	memmove(c, a, sizeof(*c));

+	c->path = strdup(a->path);

+	c->file = nil;

+	c->dir = nil;

+	incref(c->p);

+	newfid->aux = c;

+	return nil;

+}

+static void

+rdestroyfid(Fid *fid)

+{

+	Aux *a;

+	a = fid->aux;

+	if(a == nil)

+		return;

+	fid->aux = nil;

+	if(a->type == Adir && a->dir != nil){

+		ext4_dir_close(a->dir);

+		free(a->dir);

+	}else if(a->type == Afile && a->file != nil){

+		ext4_fclose(a->file);

+		free(a->file);

+	}

+	if(decref(a->p) < 1)

+		closepart(a->p);

+	free(a->path);

+	free(a);

+}

+static int

+note(void *, char *s)

+{

+	if(strncmp(s, "sys:", 4) != 0){

+		closeallparts();

+		close(0);

+		return 1;

+	}

+	return 0;

+}

+static void

+cmdsrv(void *)

+{

+	char s[32], *c, *a[4];

+	int f, p[2], n;

+	Biobuf b;

+	if(pipe(p) < 0)

+		sysfatal("%r");

+	snprint(s, sizeof(s), "#s/%s.cmd", srvname);

+	if((f = create(s, ORCLOSE|OWRITE, 0660)) < 0){

+		remove(s);

+		if((f = create(s, ORCLOSE|OWRITE, 0660)) < 0)

+			sysfatal("%r");

+	}

+	if(fprint(f, "%d", p[0]) < 1)

+		sysfatal("srv write");

+	dup(p[1], 0);

+	close(p[1]);

+	close(p[0]);

+	Binit(&b, 0, OREAD);

+	for(; (c = Brdstr(&b, '\n', 1)) != nil; free(c)){

+		if((n = tokenize(c, a, nelem(a))) < 1)

+			continue;

+		USED(n);

+		if(strcmp(a[0], "stats") == 0 || strcmp(a[0], "df") == 0){

+			statallparts();

+		}else if(strcmp(a[0], "halt") == 0){

+			closeallparts();

+			close(0);

+			threadexitsall(nil);

+		}else if(strcmp(a[0], "sync") == 0){

+			syncallparts();

+		}else{

+			print("unknown command: %s\n", a[0]);

+		}

+	}

+}

+static void

+rstart(Srv *)

+{

+	threadnotify(note, 1);

+	proccreate(cmdsrv, nil, mainstacksize);

+}

+static void

+rend(Srv *)

+{

+	closeallparts();

+	close(0);

+	threadexitsall(nil);

+}

+static Srv fs = {

+	.attach = rattach,

+	.open = ropen,

+	.create = rcreate,

+	.read = rread,

+	.write = rwrite,

+	.remove = rremove,

+	.stat = rstat,

+	.wstat = rwstat,

+	.walk1 = rwalk1,

+	.clone = rclone,

+	.destroyfid = rdestroyfid,

+	.start = rstart,

+	.end = rend,

+};

+static void

+usage(void)

+{

+	fprint(2, "usage: %s [-Clrs] [-g groupfile] [-R uid] [srvname]\n", argv0);

+	fprint(2, "mkfs:  %s -M (2|3|4) [-L label] [-b blksize] [-N numinodes] [-I inodesize] device\n", argv0);

+	threadexitsall("usage");

+}

+void

+threadmain(int argc, char **argv)

+{

+	char *gr;

+	vlong sz;

+	int f, stdio;

+	rfork(RFNOTEG);

+	stdio = 0;

+	ARGBEGIN{

+	case 'D':

+		chatty9p++;

+nomkfs:

+		if(opts.fstype > 0)

+			usage();

+		opts.fstype = 0;

+		break;

+	case 'd':

+		ext4_dmask_set(strtoul(EARGF(usage()), nil, 0));

+		break;

+	case 'C':

+		opts.cachewb = 1;

+		goto nomkfs;

+	case 'l':

+		opts.linkmode = Lresolve;

+		goto nomkfs;

+	case 'g':

+		gr = EARGF(usage());

+		if((f = open(gr, OREAD)) < 0)

+			sysfatal("%r");

+		sz = seek(f, 0, 2);

+		if(sz < 0)

+			sysfatal("%s: invalid group file", gr);

+		if((opts.group = malloc(sz+1)) == nil)

+			sysfatal("memory");

+		seek(f, 0, 0);

+		if(readn(f, opts.group, sz) != sz)

+			sysfatal("%s: read failed", gr);

+		close(f);

+		opts.group[sz] = 0;

+		goto nomkfs;

+	case 'R':

+		opts.asroot = 1;

+		Root = atoll(EARGF(usage()));

+		goto nomkfs;

+	case 'r':

+		opts.rdonly = 1;

+		goto nomkfs;

+	case 's':

+		stdio = 1;

+		goto nomkfs;

+	case 'M':

+		if(!opts.fstype)

+			usage();

+		opts.fstype = atoi(EARGF(usage()));

+		if(opts.fstype < 2 || opts.fstype > 4)

+			usage();

+		break;

+	case 'b':

+		opts.blksz = atoi(EARGF(usage()));

+		if(opts.blksz != 1024 && opts.blksz != 2048 && opts.blksz != 4096)

+			usage();

+yesmkfs:

+		if(opts.fstype < 1)

+			usage();

+		break;

+	case 'L':

+		opts.label = EARGF(usage());

+		goto yesmkfs;

+	case 'I':

+		opts.inodesz = atoi(EARGF(usage()));

+		if(opts.inodesz < 128 || ((opts.inodesz-1) & opts.inodesz) != 0)

+			usage();

+		goto yesmkfs;

+	case 'N':

+		opts.ninode = atoi(EARGF(usage()));

+		if(opts.ninode < 1)

+			usage();

+		goto yesmkfs;

+	default:

+		usage();

+	}ARGEND

+	if(opts.fstype > 1){

+		if(argc != 1)

+			usage();

+		if(openpart(argv[0], &opts) == nil)

+			sysfatal("%r");

+		closeallparts();

+		threadexitsall(nil);

+	}else{

+		if(!stdio && argc == 1)

+			srvname = *argv;

+		else if(argc != 0)

+			usage();

+		if(stdio){

+			fs.infd = 0;

+			fs.outfd = 1;

+			threadsrv(&fs);

+		}else

+			threadpostsrv(&fs, srvname);

+		threadexits(nil);

+	}

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/group.c

@@ -1,0 +1,130 @@

+#include <u.h>

+#include <libc.h>

+#include "group.h"

+int

+loadgroups(Groups *gs, char *raw)

+{

+	char *m, *s, *e, *a[5], *ide;

+	Group *g, *memb;

+	int line, n, k;

+	vlong id;

+	memset(gs, 0, sizeof(*gs));

+	if((gs->raw = strdup(raw)) == nil)

+		goto error;

+	line = 1;

+	for(s = gs->raw; *s; s = e+1, line++){

+		if((e = strchr(s, '\n')) != nil)

+			*e = 0;

+		if((n = getfields(s, a, nelem(a), 1, ":")) >= 3 && strlen(a[0]) > 0 && strlen(a[2]) > 0){

+			id = strtoll(a[2], &ide, 0);

+			if(id < 0 || id > 0xffffffff || *ide != 0){

+				werrstr("invalid uid: %s", a[2]);

+				goto error;

+			}

+			if((g = realloc(gs->g, (gs->ng+1)*sizeof(Group))) == nil)

+				goto error;

+			gs->g = g;

+			g += gs->ng++;

+			memset(g, 0, sizeof(*g));

+			g->id = id;

+			g->name = a[0];

+			for(m = a[3]; n > 3 && *m; *m++ = 0){

+				if((memb = realloc(g->memb, (g->nmemb+1)*sizeof(Group))) == nil)

+					goto error;

+				g->memb = memb;

+				memb += g->nmemb++;

+				memset(memb, 0, sizeof(*memb));

+				memb->name = m;

+				if((m = strchr(m, ',')) == nil)

+					break;

+			}

+		}else{

+			werrstr("line %d: invalid record", line);

+			goto error;

+		}

+		if(e == nil)

+			break;

+	}

+	g = gs->g;

+	for(n = 0; n < gs->ng; n++, g++){

+		for(k = 0, memb = g->memb; k < g->nmemb; k++, memb++)

+			findgroup(gs, memb->name, &memb->id);

+	}

+	return 0;

+error:

+	werrstr("togroups: %r");

+	freegroups(gs);

+	return -1;

+}

+void

+freegroups(Groups *gs)

+{

+	int i;

+	for(i = 0; i < gs->ng; i++)

+		free(gs->g[i].memb);

+	free(gs->g);

+	free(gs->raw);

+}

+Group *

+findgroup(Groups *gs, char *name, u32int *id)

+{

+	Group *g;

+	int i;

+	g = gs->g;

+	for(i = 0; i < gs->ng; i++, g++){

+		if(strcmp(g->name, name) == 0){

+			if(id != nil)

+				*id = g->id;

+			return g;

+		}

+	}

+	if(id != nil)

+		*id = ~0;

+	return nil;

+}

+Group *

+findgroupid(Groups *gs, u32int id)

+{

+	Group *g;

+	int i;

+	g = gs->g;

+	for(i = 0; i < gs->ng; i++, g++){

+		if(g->id == id)

+			return g;

+	}

+	return nil;

+}

+int

+ingroup(Group *g, u32int id)

+{

+	int i;

+	if(g->id == id)

+		return 1;

+	for(i = g->nmemb, g = g->memb; i > 0; i--, g++){

+		if(g->id == id)

+			return 1;

+	}

+	return 0;

+}

--- /dev/null

+++ b/sys/src/cmd/ext4srv/group.h

@@ -1,0 +1,21 @@

+typedef struct Group Group;

+typedef struct Groups Groups;

+struct Group {

+	u32int id;

+	char *name;

+	Group *memb;

+	int nmemb;

+};

+struct Groups {

+	char *raw;

+	Group *g;

+	int ng;

+};

+int loadgroups(Groups *gs, char *raw);

+void freegroups(Groups *gs);

+Group *findgroup(Groups *gs, char *name, u32int *id);

+Group *findgroupid(Groups *gs, u32int id);

+int ingroup(Group *g, u32int id);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4.h

@@ -1,0 +1,529 @@

+#pragma once

+#include "ext4_types.h"

+#include "ext4_debug.h"

+#include "ext4_blockdev.h"

+#pragma incomplete struct ext4_mountpoint

+/********************************OS LOCK INFERFACE***************************/

+/**@brief   OS dependent lock interface.*/

+struct ext4_lock {

+	/**@brief   Lock access to mount point.*/

+	void (*lock)(void *aux);

+	/**@brief   Unlock access to mount point.*/

+	void (*unlock)(void *aux);

+	/**@brief   Auxilary pointer.*/

+	void *p_user;

+};

+/********************************FILE DESCRIPTOR*****************************/

+/**@brief   File descriptor. */

+typedef struct ext4_file {

+	/**@brief   Mount point handle.*/

+	struct ext4_mountpoint *mp;

+	/**@brief   File inode id.*/

+	u32int inode;

+	/**@brief   Open flags.*/

+	u32int flags;

+	/**@brief   File size.*/

+	u64int fsize;

+	/**@brief   Actual file position.*/

+	u64int fpos;

+} ext4_file;

+/*****************************DIRECTORY DESCRIPTOR***************************/

+/**@brief   Directory entry descriptor. */

+typedef struct ext4_direntry {

+	u32int inode;

+	u16int entry_length;

+	u8int name_length;

+	u8int inode_type;

+	u8int name[255];

+} ext4_direntry;

+/**@brief   Directory descriptor. */

+typedef struct ext4_dir {

+	/**@brief   File descriptor.*/

+	ext4_file f;

+	/**@brief   Current directory entry.*/

+	ext4_direntry de;

+	/**@brief   Next entry offset.*/

+	u64int next_off;

+} ext4_dir;

+/********************************MOUNT OPERATIONS****************************/

+/**@brief   Register block device.

+ *

+ * @param   bd Block device.

+ * @param   dev_name Block device name.

+ *

+ * @return  Standard error code.*/

+int ext4_device_register(struct ext4_blockdev *bd,

+			 const char *dev_name);

+/**@brief   Un-register block device.

+ *

+ * @param   dev_name Block device name.

+ *

+ * @return  Standard error code.*/

+int ext4_device_unregister(const char *dev_name);

+/**@brief   Un-register all block devices.

+ *

+ * @return  Standard error code.*/

+int ext4_device_unregister_all(void);

+/**@brief   Mount a block device with EXT4 partition to the mount point.

+ *

+ * @param   dev_name Block device name (@ref ext4_device_register).

+ * @param   mount_point Mount point, for example:

+ *          -   /

+ *          -   /my_partition/

+ *          -   /my_second_partition/

+ * @param   read_only mount as read-only mode.

+ *

+ * @return Standard error code */

+int ext4_mount(const char *dev_name,

+	       const char *mount_point,

+	       bool read_only);

+/**@brief   Umount operation.

+ *

+ * @param   mount_point Mount point.

+ *

+ * @return  Standard error code */

+int ext4_umount(const char *mount_point);

+/**@brief   Starts journaling. Journaling start/stop functions are transparent

+ *          and might be used on filesystems without journaling support.

+ * @warning Usage:

+ *              ext4_mount("sda1", "/");

+ *              ext4_journal_start("/");

+ *

+ *              //File operations here...

+ *

+ *              ext4_journal_stop("/");

+ *              ext4_umount("/");

+ * @param   mount_point Mount point.

+ *

+ * @return  Standard error code. */

+int ext4_journal_start(const char *mount_point);

+/**@brief   Stops journaling. Journaling start/stop functions are transparent

+ *          and might be used on filesystems without journaling support.

+ *

+ * @param   mount_point Mount point name.

+ *

+ * @return  Standard error code. */

+int ext4_journal_stop(const char *mount_point);

+/**@brief   Journal recovery.

+ * @warning Must be called after @ref ext4_mount.

+ *

+ * @param   mount_point Mount point.

+ *

+ * @return Standard error code. */

+int ext4_recover(const char *mount_point);

+/**@brief   Some of the filesystem stats. */

+struct ext4_mount_stats {

+	u32int inodes_count;

+	u32int free_inodes_count;

+	u64int blocks_count;

+	u64int free_blocks_count;

+	u32int block_size;

+	u32int block_group_count;

+	u32int blocks_per_group;

+	u32int inodes_per_group;

+	char volume_name[16];

+};

+/**@brief   Get file mount point stats.

+ *

+ * @param   mount_point Mount point.

+ * @param   stats Filesystem stats.

+ *

+ * @return Standard error code. */

+int ext4_mount_point_stats(const char *mount_point,

+			   struct ext4_mount_stats *stats);

+/**@brief   Setup OS lock routines.

+ *

+ * @param   mount_point Mount point.

+ * @param   locks  Lock and unlock functions

+ *

+ * @return Standard error code. */

+int ext4_mount_setup_locks(const char *mount_point,

+			   const struct ext4_lock *locks);

+/**@brief   Acquire the filesystem superblock pointer of a mp.

+ *

+ * @param   mount_point Mount point.

+ * @param   sb Superblock handle

+ *

+ * @return Standard error code. */

+int ext4_get_sblock(const char *mount_point, struct ext4_sblock **sb);

+/**@brief   Enable/disable write back cache mode.

+ * @warning Default model of cache is write through. It means that when you do:

+ *

+ *          ext4_fopen(...);

+ *          ext4_fwrite(...);

+ *                           < --- data is flushed to physical drive

+ *

+ *          When you do:

+ *          ext4_cache_write_back(..., 1);

+ *          ext4_fopen(...);

+ *          ext4_fwrite(...);

+ *                           < --- data is NOT flushed to physical drive

+ *          ext4_cache_write_back(..., 0);

+ *                           < --- when write back mode is disabled all

+ *                                 cache data will be flushed

+ * To enable write back mode permanently just call this function

+ * once after ext4_mount (and disable before ext4_umount).

+ *

+ * Some of the function use write back cache mode internally.

+ * If you enable write back mode twice you have to disable it twice

+ * to flush all data:

+ *

+ *      ext4_cache_write_back(..., 1);

+ *      ext4_cache_write_back(..., 1);

+ *

+ *      ext4_cache_write_back(..., 0);

+ *      ext4_cache_write_back(..., 0);

+ *

+ * Write back mode is useful when you want to create a lot of empty

+ * files/directories.

+ *

+ * @param   path Path.

+ * @param   on Enable/disable cache writeback mode.

+ *

+ * @return Standard error code. */

+int ext4_cache_write_back(const char *path, bool on);

+/**@brief   Force cache flush.

+ *

+ * @param   path Path.

+ *

+ * @return  Standard error code. */

+int ext4_cache_flush(const char *path);

+/********************************FILE OPERATIONS*****************************/

+/**@brief   Remove file by path.

+ *

+ * @param   path Path to file.

+ *

+ * @return  Standard error code. */

+int ext4_fremove(const char *path);

+/**@brief   Create a hardlink for a file.

+ *

+ * @param   path Path to file.

+ * @param   hardlink_path Path of hardlink.

+ *

+ * @return  Standard error code. */

+int ext4_flink(const char *path, const char *hardlink_path);

+/**@brief Rename file.

+ * @param path Source.

+ * @param new_path Destination.

+ * @return  Standard error code. */

+int ext4_frename(const char *path, const char *new_path);

+/**@brief   File open function.

+ *

+ * @param   file  File handle.

+ * @param   path  File path, has to start from mount point:/my_partition/file.

+ * @param   flags File open flags.

+ *  |---------------------------------------------------------------|

+ *  |   r or rb                 O_RDONLY                            |

+ *  |---------------------------------------------------------------|

+ *  |   w or wb                 O_WRONLY|O_CREAT|O_TRUNC            |

+ *  |---------------------------------------------------------------|

+ *  |   a or ab                 O_WRONLY|O_CREAT|O_APPEND           |

+ *  |---------------------------------------------------------------|

+ *  |   r+ or rb+ or r+b        O_RDWR                              |

+ *  |---------------------------------------------------------------|

+ *  |   w+ or wb+ or w+b        O_RDWR|O_CREAT|O_TRUNC              |

+ *  |---------------------------------------------------------------|

+ *  |   a+ or ab+ or a+b        O_RDWR|O_CREAT|O_APPEND             |

+ *  |---------------------------------------------------------------|

+ *

+ * @return  Standard error code.*/

+int ext4_fopen(ext4_file *file, const char *path, const char *flags);

+/**@brief   Alternate file open function.

+ *

+ * @param   file  File handle.

+ * @param   path  File path, has to start from mount point:/my_partition/file.

+ * @param   flags File open flags.

+ *

+ * @return  Standard error code.*/

+int ext4_fopen2(ext4_file *file, const char *path, int flags);

+/**@brief   File close function.

+ *

+ * @param   file File handle.

+ *

+ * @return  Standard error code.*/

+int ext4_fclose(ext4_file *file);

+/**@brief   File truncate function.

+ *

+ * @param   file File handle.

+ * @param   size New file size.

+ *

+ * @return  Standard error code.*/

+int ext4_ftruncate(ext4_file *file, u64int size);

+/**@brief   Read data from file.

+ *

+ * @param   file File handle.

+ * @param   buf  Output buffer.

+ * @param   size Bytes to read.

+ * @param   rcnt Bytes read (nil allowed).

+ *

+ * @return  Standard error code.*/

+int ext4_fread(ext4_file *file, void *buf, usize size, usize *rcnt);

+/**@brief   Write data to file.

+ *

+ * @param   file File handle.

+ * @param   buf  Data to write

+ * @param   size Write length..

+ * @param   wcnt Bytes written (nil allowed).

+ *

+ * @return  Standard error code.*/

+int ext4_fwrite(ext4_file *file, const void *buf, usize size, usize *wcnt);

+/**@brief   File seek operation.

+ *

+ * @param   file File handle.

+ * @param   offset Offset to seek.

+ * @param   origin Seek type:

+ *              @ref SEEK_SET

+ *              @ref SEEK_CUR

+ *              @ref SEEK_END

+ *

+ * @return  Standard error code.*/

+int ext4_fseek(ext4_file *file, s64int offset, u32int origin);

+/**@brief   Get file position.

+ *

+ * @param   file File handle.

+ *

+ * @return  Actual file position */

+u64int ext4_ftell(ext4_file *file);

+/**@brief   Get file size.

+ *

+ * @param   file File handle.

+ *

+ * @return  File size. */

+u64int ext4_fsize(ext4_file *file);

+/**@brief Get inode of file/directory/link.

+ *

+ * @param path    Parh to file/dir/link.

+ * @param ret_ino Inode number.

+ * @param inode   Inode internals.

+ *

+ * @return  Standard error code.*/

+int ext4_raw_inode_fill(const char *path, u32int *ret_ino,

+			struct ext4_inode *inode);

+/**@brief Check if inode exists.

+ *

+ * @param path    Parh to file/dir/link.

+ * @param type    Inode type.

+ *                @ref EXT4_DE_UNKNOWN

+ *                @ref EXT4_DE_REG_FILE

+ *                @ref EXT4_DE_DIR

+ *                @ref EXT4_DE_CHRDEV

+ *                @ref EXT4_DE_BLKDEV

+ *                @ref EXT4_DE_FIFO

+ *                @ref EXT4_DE_SOCK

+ *                @ref EXT4_DE_SYMLINK

+ *

+ * @return  Standard error code.*/

+int ext4_inode_exist(const char *path, int type);

+/**@brief Change file/directory/link mode bits.

+ *

+ * @param path Path to file/dir/link.

+ * @param mode New mode bits (for example 0777).

+ *

+ * @return  Standard error code.*/

+int ext4_mode_set(const char *path, u32int mode);

+/**@brief Get file/directory/link mode bits.

+ *

+ * @param path Path to file/dir/link.

+ * @param mode New mode bits (for example 0777).

+ *

+ * @return  Standard error code.*/

+int ext4_mode_get(const char *path, u32int *mode);

+/**@brief Change file owner and group.

+ *

+ * @param path Path to file/dir/link.

+ * @param uid  User id.

+ * @param gid  Group id.

+ *

+ * @return  Standard error code.*/

+int ext4_owner_set(const char *path, u32int uid, u32int gid);

+/**@brief Get file/directory/link owner and group.

+ *

+ * @param path Path to file/dir/link.

+ * @param uid  User id.

+ * @param gid  Group id.

+ *

+ * @return  Standard error code.*/

+int ext4_owner_get(const char *path, u32int *uid, u32int *gid);

+/**@brief Set file/directory/link access time.

+ *

+ * @param path  Path to file/dir/link.

+ * @param atime Access timestamp.

+ *

+ * @return  Standard error code.*/

+int ext4_atime_set(const char *path, u32int atime);

+/**@brief Set file/directory/link modify time.

+ *

+ * @param path  Path to file/dir/link.

+ * @param mtime Modify timestamp.

+ *

+ * @return  Standard error code.*/

+int ext4_mtime_set(const char *path, u32int mtime);

+/**@brief Set file/directory/link change time.

+ *

+ * @param path  Path to file/dir/link.

+ * @param ctime Change timestamp.

+ *

+ * @return  Standard error code.*/

+int ext4_ctime_set(const char *path, u32int ctime);

+/**@brief Get file/directory/link access time.

+ *

+ * @param path  Path to file/dir/link.

+ * @param atime Access timestamp.

+ *

+ * @return  Standard error code.*/

+int ext4_atime_get(const char *path, u32int *atime);

+/**@brief Get file/directory/link modify time.

+ *

+ * @param path  Path to file/dir/link.

+ * @param mtime Modify timestamp.

+ *

+ * @return  Standard error code.*/

+int ext4_mtime_get(const char *path, u32int *mtime);

+/**@brief Get file/directory/link change time.

+ *

+ * @param path  Pathto file/dir/link.

+ * @param ctime Change timestamp.

+ *

+ * @return  standard error code*/

+int ext4_ctime_get(const char *path, u32int *ctime);

+/**@brief Create symbolic link.

+ *

+ * @param target Destination entry path.

+ * @param path   Source entry path.

+ *

+ * @return  Standard error code.*/

+int ext4_fsymlink(const char *target, const char *path);

+/**@brief Create special file.

+ * @param path     Path to new special file.

+ * @param filetype Filetype of the new special file.

+ * 	           (that must not be regular file, directory, or unknown type)

+ * @param dev      If filetype is char device or block device,

+ * 	           the device number will become the payload in the inode.

+ * @return  Standard error code.*/

+int ext4_mknod(const char *path, int filetype, u32int dev);

+/**@brief Read symbolic link payload.

+ *

+ * @param path    Path to symlink.

+ * @param buf     Output buffer.

+ * @param bufsize Output buffer max size.

+ * @param rcnt    Bytes read.

+ *

+ * @return  Standard error code.*/

+int ext4_readlink(const char *path, char *buf, usize bufsize, usize *rcnt);

+/*********************************DIRECTORY OPERATION***********************/

+/**@brief   Recursive directory remove.

+ *

+ * @param   path Directory path to remove

+ *

+ * @return  Standard error code.*/

+int ext4_dir_rm(const char *path);

+/**@brief Rename/move directory.

+ *

+ * @param path     Source path.

+ * @param new_path Destination path.

+ *

+ * @return  Standard error code. */

+int ext4_dir_mv(const char *path, const char *new_path);

+/**@brief   Create new directory.

+ *

+ * @param   path Directory name.

+ *

+ * @return  Standard error code.*/

+int ext4_dir_mk(const char *path);

+/**@brief   Directory open.

+ *

+ * @param   dir  Directory handle.

+ * @param   path Directory path.

+ *

+ * @return  Standard error code.*/

+int ext4_dir_open(ext4_dir *dir, const char *path);

+/**@brief   Directory close.

+ *

+ * @param   dir directory handle.

+ *

+ * @return  Standard error code.*/

+int ext4_dir_close(ext4_dir *dir);

+/**@brief   Return next directory entry.

+ *

+ * @param   dir Directory handle.

+ *

+ * @return  Directory entry id (nil if no entry)*/

+const ext4_direntry *ext4_dir_entry_next(ext4_dir *dir);

+/**@brief   Rewind directory entry offset.

+ *

+ * @param   dir Directory handle.*/

+void ext4_dir_entry_rewind(ext4_dir *dir);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_balloc.h

@@ -1,0 +1,62 @@

+#pragma once

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_fs.h"

+/**@brief Compute number of block group from block address.

+ * @param sb superblock pointer.

+ * @param baddr Absolute address of block.

+ * @return Block group index

+ */

+u32int ext4_balloc_get_bgid_of_block(struct ext4_sblock *s,

+				       ext4_fsblk_t baddr);

+/**@brief Compute the starting block address of a block group

+ * @param sb   superblock pointer.

+ * @param bgid block group index

+ * @return Block address

+ */

+ext4_fsblk_t ext4_balloc_get_block_of_bgid(struct ext4_sblock *s,

+					   u32int bgid);

+/**@brief Calculate and set checksum of block bitmap.

+ * @param sb superblock pointer.

+ * @param bg block group

+ * @param bitmap bitmap buffer

+ */

+void ext4_balloc_set_bitmap_csum(struct ext4_sblock *sb,

+				 struct ext4_bgroup *bg,

+				 void *bitmap);

+/**@brief   Free block from inode.

+ * @param   inode_ref inode reference

+ * @param   baddr block address

+ * @return  standard error code*/

+int ext4_balloc_free_block(struct ext4_inode_ref *inode_ref,

+			   ext4_fsblk_t baddr);

+/**@brief   Free blocks from inode.

+ * @param   inode_ref inode reference

+ * @param   first block address

+ * @param   count block count

+ * @return  standard error code*/

+int ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref,

+			    ext4_fsblk_t first, u32int count);

+/**@brief   Allocate block procedure.

+ * @param   inode_ref inode reference

+ * @param   goal

+ * @param   baddr allocated block address

+ * @return  standard error code*/

+int ext4_balloc_alloc_block(struct ext4_inode_ref *inode_ref,

+			    ext4_fsblk_t goal,

+			    ext4_fsblk_t *baddr);

+/**@brief   Try allocate selected block.

+ * @param   inode_ref inode reference

+ * @param   baddr block address to allocate

+ * @param   free if baddr is not allocated

+ * @return  standard error code*/

+int ext4_balloc_try_alloc_block(struct ext4_inode_ref *inode_ref,

+				ext4_fsblk_t baddr, bool *free);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_bcache.h

@@ -1,0 +1,240 @@

+#pragma once

+#include "tree.h"

+#include "queue.h"

+#define EXT4_BLOCK_ZERO() 	\

+	{0}

+/**@brief   Single block descriptor*/

+struct ext4_buf {

+	/**@brief   Flags*/

+	int flags;

+	/**@brief   Logical block address*/

+	u64int lba;

+	/**@brief   Data buffer.*/

+	u8int *data;

+	/**@brief   LRU priority. (unused) */

+	u32int lru_prio;

+	/**@brief   LRU id.*/

+	u32int lru_id;

+	/**@brief   Reference count table*/

+	u32int refctr;

+	/**@brief   The block cache this buffer belongs to. */

+	struct ext4_bcache *bc;

+	/**@brief   Whether or not buffer is on dirty list.*/

+	bool on_dirty_list;

+	/**@brief   LBA tree node*/

+	RB_ENTRY(ext4_buf) lba_node;

+	/**@brief   LRU tree node*/

+	RB_ENTRY(ext4_buf) lru_node;

+	/**@brief   Dirty list node*/

+	SLIST_ENTRY(ext4_buf) dirty_node;

+	/**@brief   Callback routine after a disk-write operation.

+	 * @param   bc block cache descriptor

+	 * @param   buf buffer descriptor

+	 * @param   standard error code returned by bdev->bwrite()

+	 * @param   arg argument passed to this routine*/

+	void (*end_write)(struct ext4_bcache *bc,

+			  struct ext4_buf *buf,

+			  int res,

+			  void *arg);

+	/**@brief   argument passed to end_write() callback.*/

+	void *end_write_arg;

+};

+/**@brief   Single block descriptor*/

+struct ext4_block {

+	/**@brief   Logical block ID*/

+	u64int lb_id;

+	/**@brief   Buffer */

+	struct ext4_buf *buf;

+	/**@brief   Data buffer.*/

+	u8int *data;

+};

+/**@brief   Block cache descriptor*/

+struct ext4_bcache {

+	/**@brief   Item count in block cache*/

+	u32int cnt;

+	/**@brief   Item size in block cache*/

+	u32int itemsize;

+	/**@brief   Last recently used counter*/

+	u32int lru_ctr;

+	/**@brief   Currently referenced datablocks*/

+	u32int ref_blocks;

+	/**@brief   Maximum referenced datablocks*/

+	u32int max_ref_blocks;

+	/**@brief   The blockdev binded to this block cache*/

+	struct ext4_blockdev *bdev;

+	/**@brief   The cache should not be shaked */

+	bool dont_shake;

+	/**@brief   A tree holding all bufs*/

+	RB_HEAD(ext4_buf_lba, ext4_buf) lba_root;

+	/**@brief   A tree holding unreferenced bufs*/

+	RB_HEAD(ext4_buf_lru, ext4_buf) lru_root;

+	/**@brief   A singly-linked list holding dirty buffers*/

+	SLIST_HEAD(ext4_buf_dirty, ext4_buf) dirty_list;

+};

+/**@brief buffer state bits

+ *

+ *  - BC_UPTODATE: Buffer contains valid data.

+ *  - BC_DIRTY: Buffer is dirty.

+ *  - BC_FLUSH: Buffer will be immediately flushed,

+ *              when no one references it.

+ *  - BC_TMP: Buffer will be dropped once its refctr

+ *            reaches zero.

+ */

+enum bcache_state_bits {

+	BC_UPTODATE,

+	BC_DIRTY,

+	BC_FLUSH,

+	BC_TMP

+};

+#define ext4_bcache_set_flag(buf, b)    \

+	(buf)->flags |= 1 << (b)

+#define ext4_bcache_clear_flag(buf, b)    \

+	(buf)->flags &= ~(1 << (b))

+#define ext4_bcache_test_flag(buf, b)    \

+	(((buf)->flags & (1 << (b))) >> (b))

+static inline void ext4_bcache_set_dirty(struct ext4_buf *buf) {

+	ext4_bcache_set_flag(buf, BC_UPTODATE);

+	ext4_bcache_set_flag(buf, BC_DIRTY);

+}

+static inline void ext4_bcache_clear_dirty(struct ext4_buf *buf) {

+	ext4_bcache_clear_flag(buf, BC_UPTODATE);

+	ext4_bcache_clear_flag(buf, BC_DIRTY);

+}

+/**@brief   Increment reference counter of buf by 1.*/

+#define ext4_bcache_inc_ref(buf) ((buf)->refctr++)

+/**@brief   Decrement reference counter of buf by 1.*/

+#define ext4_bcache_dec_ref(buf) ((buf)->refctr--)

+/**@brief   Insert buffer to dirty cache list

+ * @param   bc block cache descriptor

+ * @param   buf buffer descriptor */

+static inline void

+ext4_bcache_insert_dirty_node(struct ext4_bcache *bc, struct ext4_buf *buf) {

+	if (!buf->on_dirty_list) {

+		SLIST_INSERT_HEAD(&bc->dirty_list, buf, dirty_node);

+		buf->on_dirty_list = true;

+	}

+}

+/**@brief   Remove buffer to dirty cache list

+ * @param   bc block cache descriptor

+ * @param   buf buffer descriptor */

+static inline void

+ext4_bcache_remove_dirty_node(struct ext4_bcache *bc, struct ext4_buf *buf) {

+	if (buf->on_dirty_list) {

+		SLIST_REMOVE(&bc->dirty_list, buf, ext4_buf, dirty_node);

+		buf->on_dirty_list = false;

+	}

+}

+/**@brief   Dynamic initialization of block cache.

+ * @param   bc block cache descriptor

+ * @param   cnt items count in block cache

+ * @param   itemsize single item size (in bytes)

+ * @return  standard error code*/

+int ext4_bcache_init_dynamic(struct ext4_bcache *bc, u32int cnt,

+			     u32int itemsize);

+/**@brief   Do cleanup works on block cache.

+ * @param   bc block cache descriptor.*/

+void ext4_bcache_cleanup(struct ext4_bcache *bc);

+/**@brief   Dynamic de-initialization of block cache.

+ * @param   bc block cache descriptor

+ * @return  standard error code*/

+int ext4_bcache_fini_dynamic(struct ext4_bcache *bc);

+/**@brief   Get a buffer with the lowest LRU counter in bcache.

+ * @param   bc block cache descriptor

+ * @return  buffer with the lowest LRU counter*/

+struct ext4_buf *ext4_buf_lowest_lru(struct ext4_bcache *bc);

+/**@brief   Drop unreferenced buffer from bcache.

+ * @param   bc block cache descriptor

+ * @param   buf buffer*/

+void ext4_bcache_drop_buf(struct ext4_bcache *bc, struct ext4_buf *buf);

+/**@brief   Invalidate a buffer.

+ * @param   bc block cache descriptor

+ * @param   buf buffer*/

+void ext4_bcache_invalidate_buf(struct ext4_bcache *bc,

+				struct ext4_buf *buf);

+/**@brief   Invalidate a range of buffers.

+ * @param   bc block cache descriptor

+ * @param   from starting lba

+ * @param   cnt block counts

+ * @param   buf buffer*/

+void ext4_bcache_invalidate_lba(struct ext4_bcache *bc,

+				u64int from,

+				u32int cnt);

+/**@brief   Find existing buffer from block cache memory.

+ *          Unreferenced block allocation is based on LRU

+ *          (Last Recently Used) algorithm.

+ * @param   bc block cache descriptor

+ * @param   b block to alloc

+ * @param   lba logical block address

+ * @return  block cache buffer */

+struct ext4_buf *

+ext4_bcache_find_get(struct ext4_bcache *bc, struct ext4_block *b,

+		     u64int lba);

+/**@brief   Allocate block from block cache memory.

+ *          Unreferenced block allocation is based on LRU

+ *          (Last Recently Used) algorithm.

+ * @param   bc block cache descriptor

+ * @param   b block to alloc

+ * @param   is_new block is new (needs to be read)

+ * @return  standard error code*/

+int ext4_bcache_alloc(struct ext4_bcache *bc, struct ext4_block *b,

+		      bool *is_new);

+/**@brief   Free block from cache memory (decrement reference counter).

+ * @param   bc block cache descriptor

+ * @param   b block to free

+ * @return  standard error code*/

+int ext4_bcache_free(struct ext4_bcache *bc, struct ext4_block *b);

+/**@brief   Return a full status of block cache.

+ * @param   bc block cache descriptor

+ * @return  full status*/

+bool ext4_bcache_is_full(struct ext4_bcache *bc);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_bitmap.h

@@ -1,0 +1,49 @@

+#pragma once

+#include "ext4_config.h"

+/**@brief   Set bitmap bit.

+ * @param   bmap bitmap

+ * @param   bit bit to set*/

+static inline void ext4_bmap_bit_set(u8int *bmap, u32int bit)

+{

+	*(bmap + (bit >> 3)) |= (1 << (bit & 7));

+}

+/**@brief   Clear bitmap bit.

+ * @param   bmap bitmap buffer

+ * @param   bit bit to clear*/

+static inline void ext4_bmap_bit_clr(u8int *bmap, u32int bit)

+{

+	*(bmap + (bit >> 3)) &= ~(1 << (bit & 7));

+}

+/**@brief   Check if the bitmap bit is set.

+ * @param   bmap bitmap buffer

+ * @param   bit bit to check*/

+static inline bool ext4_bmap_is_bit_set(u8int *bmap, u32int bit)

+{

+	return (*(bmap + (bit >> 3)) & (1 << (bit & 7)));

+}

+/**@brief   Check if the bitmap bit is clear.

+ * @param   bmap bitmap buffer

+ * @param   bit bit to check*/

+static inline bool ext4_bmap_is_bit_clr(u8int *bmap, u32int bit)

+{

+	return !ext4_bmap_is_bit_set(bmap, bit);

+}

+/**@brief   Free range of bits in bitmap.

+ * @param   bmap bitmap buffer

+ * @param   sbit start bit

+ * @param   bcnt bit count*/

+void ext4_bmap_bits_free(u8int *bmap, u32int sbit, u32int bcnt);

+/**@brief   Find first clear bit in bitmap.

+ * @param   sbit start bit of search

+ * @param   ebit end bit of search

+ * @param   bit_id output parameter (first free bit)

+ * @return  standard error code*/

+int ext4_bmap_bit_find_clr(u8int *bmap, u32int sbit, u32int ebit,

+			   u32int *bit_id, bool *no_space);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_block_group.h

@@ -1,0 +1,271 @@

+#pragma once

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_super.h"

+/**@brief Get address of block with data block bitmap.

+ * @param bg pointer to block group

+ * @param s pointer to superblock

+ * @return Address of block with block bitmap

+ */

+static inline u64int ext4_bg_get_block_bitmap(struct ext4_bgroup *bg,

+						struct ext4_sblock *s)

+{

+	u64int v = to_le32(bg->block_bitmap_lo);

+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		v |= (u64int)to_le32(bg->block_bitmap_hi) << 32;

+	return v;

+}

+/**@brief Set address of block with data block bitmap.

+ * @param bg pointer to block group

+ * @param s pointer to superblock

+ * @param blk block to set

+ * @return Address of block with block bitmap

+ */

+static inline void ext4_bg_set_block_bitmap(struct ext4_bgroup *bg,

+					    struct ext4_sblock *s, u64int blk)

+{

+	bg->block_bitmap_lo = to_le32((u32int)blk);

+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		bg->block_bitmap_hi = to_le32(blk >> 32);

+}

+/**@brief Get address of block with i-node bitmap.

+ * @param bg Pointer to block group

+ * @param s Pointer to superblock

+ * @return Address of block with i-node bitmap

+ */

+static inline u64int ext4_bg_get_inode_bitmap(struct ext4_bgroup *bg,

+						struct ext4_sblock *s)

+{

+	u64int v = to_le32(bg->inode_bitmap_lo);

+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		v |= (u64int)to_le32(bg->inode_bitmap_hi) << 32;

+	return v;

+}

+/**@brief Set address of block with i-node bitmap.

+ * @param bg Pointer to block group

+ * @param s Pointer to superblock

+ * @param blk block to set

+ * @return Address of block with i-node bitmap

+ */

+static inline void ext4_bg_set_inode_bitmap(struct ext4_bgroup *bg,

+					    struct ext4_sblock *s, u64int blk)

+{

+	bg->inode_bitmap_lo = to_le32((u32int)blk);

+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		bg->inode_bitmap_hi = to_le32(blk >> 32);

+}

+/**@brief Get address of the first block of the i-node table.

+ * @param bg Pointer to block group

+ * @param s Pointer to superblock

+ * @return Address of first block of i-node table

+ */

+static inline u64int

+ext4_bg_get_inode_table_first_block(struct ext4_bgroup *bg,

+				    struct ext4_sblock *s)

+{

+	u64int v = to_le32(bg->inode_table_first_block_lo);

+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		v |= (u64int)to_le32(bg->inode_table_first_block_hi) << 32;

+	return v;

+}

+/**@brief Set address of the first block of the i-node table.

+ * @param bg Pointer to block group

+ * @param s Pointer to superblock

+ * @param blk block to set

+ * @return Address of first block of i-node table

+ */

+static inline void

+ext4_bg_set_inode_table_first_block(struct ext4_bgroup *bg,

+				    struct ext4_sblock *s, u64int blk)

+{

+	bg->inode_table_first_block_lo = to_le32((u32int)blk);

+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		bg->inode_table_first_block_hi = to_le32(blk >> 32);

+}

+/**@brief Get number of free blocks in block group.

+ * @param bg Pointer to block group

+ * @param sb Pointer to superblock

+ * @return Number of free blocks in block group

+ */

+static inline u32int ext4_bg_get_free_blocks_count(struct ext4_bgroup *bg,

+						     struct ext4_sblock *s)

+{

+	u32int v = to_le16(bg->free_blocks_count_lo);

+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		v |= (u32int)to_le16(bg->free_blocks_count_hi) << 16;

+	return v;

+}

+/**@brief Set number of free blocks in block group.

+ * @param bg Pointer to block group

+ * @param s Pointer to superblock

+ * @param cnt Number of free blocks in block group

+ */

+static inline void ext4_bg_set_free_blocks_count(struct ext4_bgroup *bg,

+						 struct ext4_sblock *s,

+						 u32int cnt)

+{

+	bg->free_blocks_count_lo = to_le16((cnt << 16) >> 16);

+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		bg->free_blocks_count_hi = to_le16(cnt >> 16);

+}

+/**@brief Get number of free i-nodes in block group.

+ * @param bg Pointer to block group

+ * @param s Pointer to superblock

+ * @return Number of free i-nodes in block group

+ */

+static inline u32int ext4_bg_get_free_inodes_count(struct ext4_bgroup *bg,

+						     struct ext4_sblock *s)

+{

+	u32int v = to_le16(bg->free_inodes_count_lo);

+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		v |= (u32int)to_le16(bg->free_inodes_count_hi) << 16;

+	return v;

+}

+/**@brief Set number of free i-nodes in block group.

+ * @param bg Pointer to block group

+ * @param s Pointer to superblock

+ * @param cnt Number of free i-nodes in block group

+ */

+static inline void ext4_bg_set_free_inodes_count(struct ext4_bgroup *bg,

+						 struct ext4_sblock *s,

+						 u32int cnt)

+{

+	bg->free_inodes_count_lo = to_le16((cnt << 16) >> 16);

+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		bg->free_inodes_count_hi = to_le16(cnt >> 16);

+}

+/**@brief Get number of used directories in block group.

+ * @param bg Pointer to block group

+ * @param s Pointer to superblock

+ * @return Number of used directories in block group

+ */

+static inline u32int ext4_bg_get_used_dirs_count(struct ext4_bgroup *bg,

+						   struct ext4_sblock *s)

+{

+	u32int v = to_le16(bg->used_dirs_count_lo);

+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		v |= (u32int)to_le16(bg->used_dirs_count_hi) << 16;

+	return v;

+}

+/**@brief Set number of used directories in block group.

+ * @param bg Pointer to block group

+ * @param s Pointer to superblock

+ * @param cnt Number of used directories in block group

+ */

+static inline void ext4_bg_set_used_dirs_count(struct ext4_bgroup *bg,

+					       struct ext4_sblock *s,

+					       u32int cnt)

+{

+	bg->used_dirs_count_lo = to_le16((cnt << 16) >> 16);

+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		bg->used_dirs_count_hi = to_le16(cnt >> 16);

+}

+/**@brief Get number of unused i-nodes.

+ * @param bg Pointer to block group

+ * @param s Pointer to superblock

+ * @return Number of unused i-nodes

+ */

+static inline u32int ext4_bg_get_itable_unused(struct ext4_bgroup *bg,

+						 struct ext4_sblock *s)

+{

+	u32int v = to_le16(bg->itable_unused_lo);

+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		v |= (u32int)to_le16(bg->itable_unused_hi) << 16;

+	return v;

+}

+/**@brief Set number of unused i-nodes.

+ * @param bg Pointer to block group

+ * @param s Pointer to superblock

+ * @param cnt Number of unused i-nodes

+ */

+static inline void ext4_bg_set_itable_unused(struct ext4_bgroup *bg,

+					     struct ext4_sblock *s,

+					     u32int cnt)

+{

+	bg->itable_unused_lo = to_le16((cnt << 16) >> 16);

+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)

+		bg->itable_unused_hi = to_le16(cnt >> 16);

+}

+/**@brief  Set checksum of block group.

+ * @param bg Pointer to block group

+ * @param crc Cheksum of block group

+ */

+static inline void ext4_bg_set_checksum(struct ext4_bgroup *bg, u16int crc)

+{

+	bg->checksum = to_le16(crc);

+}

+/**@brief Check if block group has a flag.

+ * @param bg Pointer to block group

+ * @param flag Flag to be checked

+ * @return True if flag is set to 1

+ */

+static inline bool ext4_bg_has_flag(struct ext4_bgroup *bg, u32int f)

+{

+	return to_le16(bg->flags) & f;

+}

+/**@brief Set flag of block group.

+ * @param bg Pointer to block group

+ * @param flag Flag to be set

+ */

+static inline void ext4_bg_set_flag(struct ext4_bgroup *bg, u32int f)

+{

+	u16int flags = to_le16(bg->flags);

+	flags |= f;

+	bg->flags = to_le16(flags);

+}

+/**@brief Clear flag of block group.

+ * @param bg Pointer to block group

+ * @param flag Flag to be cleared

+ */

+static inline void ext4_bg_clear_flag(struct ext4_bgroup *bg, u32int f)

+{

+	u16int flags = to_le16(bg->flags);

+	flags &= ~f;

+	bg->flags = to_le16(flags);

+}

+/**@brief Calculate CRC16 of the block group.

+ * @param crc Init value

+ * @param buffer Input buffer

+ * @param len Sizeof input buffer

+ * @return Computed CRC16*/

+u16int ext4_bg_crc16(u16int crc, const u8int *buffer, usize len);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_blockdev.h

@@ -1,0 +1,215 @@

+#pragma once

+#include "ext4_bcache.h"

+struct ext4_blockdev_iface {

+	/**@brief   Open device function

+	 * @param   bdev block device.*/

+	int (*open)(struct ext4_blockdev *bdev);

+	/**@brief   Block read function.

+	 * @param   bdev block device

+	 * @param   buf output buffer

+	 * @param   blk_id block id

+	 * @param   blk_cnt block count*/

+	int (*bread)(struct ext4_blockdev *bdev, void *buf, u64int blk_id,

+		     u32int blk_cnt);

+	/**@brief   Block write function.

+	 * @param   buf input buffer

+	 * @param   blk_id block id

+	 * @param   blk_cnt block count*/

+	int (*bwrite)(struct ext4_blockdev *bdev, const void *buf,

+		      u64int blk_id, u32int blk_cnt);

+	/**@brief   Close device function.

+	 * @param   bdev block device.*/

+	int (*close)(struct ext4_blockdev *bdev);

+	/**@brief   Lock block device. Required in multi partition mode

+	 *          operations. Not mandatory field.

+	 * @param   bdev block device.*/

+	int (*lock)(struct ext4_blockdev *bdev);

+	/**@brief   Unlock block device. Required in multi partition mode

+	 *          operations. Not mandatory field.

+	 * @param   bdev block device.*/

+	int (*unlock)(struct ext4_blockdev *bdev);

+	/**@brief   Block size (bytes): physical*/

+	u32int ph_bsize;

+	/**@brief   Block count: physical*/

+	u64int ph_bcnt;

+	/**@brief   Block size buffer: physical*/

+	u8int *ph_bbuf;

+	/**@brief   Reference counter to block device interface*/

+	u32int ph_refctr;

+	/**@brief   Physical read counter*/

+	u32int bread_ctr;

+	/**@brief   Physical write counter*/

+	u32int bwrite_ctr;

+	/**@brief   User data pointer*/

+	void* p_user;

+};

+/**@brief   Definition of the simple block device.*/

+struct ext4_blockdev {

+	/**@brief Block device interface*/

+	struct ext4_blockdev_iface *bdif;

+	/**@brief Offset in bdif. For multi partition mode.*/

+	u64int part_offset;

+	/**@brief Part size in bdif. For multi partition mode.*/

+	u64int part_size;

+	/**@brief   Block cache.*/

+	struct ext4_bcache *bc;

+	/**@brief   Block size (bytes) logical*/

+	u32int lg_bsize;

+	/**@brief   Block count: logical*/

+	u64int lg_bcnt;

+	/**@brief   Cache write back mode reference counter*/

+	u32int cache_write_back;

+	/**@brief   The filesystem this block device belongs to. */

+	struct ext4_fs *fs;

+	void *journal;

+};

+#pragma incomplete struct ext4_blockdev

+/**@brief   Static initialization of the block device.*/

+#define EXT4_BLOCKDEV_STATIC_INSTANCE(__name, __bsize, __bcnt, __open, __bread,\

+				      __bwrite, __close, __lock, __unlock)     \

+	static u8int __name##_ph_bbuf[(__bsize)];                            \

+	static struct ext4_blockdev_iface __name##_iface = {                   \

+		.open = __open,                                                \

+		.bread = __bread,                                              \

+		.bwrite = __bwrite,                                            \

+		.close = __close,                                              \

+		.lock = __lock,                                                \

+		.unlock = __unlock,                                            \

+		.ph_bsize = __bsize,                                           \

+		.ph_bcnt = __bcnt,                                             \

+		.ph_bbuf = __name##_ph_bbuf,                                   \

+	};								       \

+	static struct ext4_blockdev __name = {                                 \

+		.bdif = &__name##_iface,                                       \

+		.part_offset = 0,                                              \

+		.part_size =  (__bcnt) * (__bsize),                            \

+	}

+/**@brief   Block device initialization.

+ * @param   bdev block device descriptor

+ * @return  standard error code*/

+int ext4_block_init(struct ext4_blockdev *bdev);

+/**@brief   Binds a bcache to block device.

+ * @param   bdev block device descriptor

+ * @param   bc block cache descriptor

+ * @return  standard error code*/

+int ext4_block_bind_bcache(struct ext4_blockdev *bdev, struct ext4_bcache *bc);

+/**@brief   Close block device

+ * @param   bdev block device descriptor

+ * @return  standard error code*/

+int ext4_block_fini(struct ext4_blockdev *bdev);

+/**@brief   Flush data in given buffer to disk.

+ * @param   bdev block device descriptor

+ * @param   buf buffer

+ * @return  standard error code*/

+int ext4_block_flush_buf(struct ext4_blockdev *bdev, struct ext4_buf *buf);

+/**@brief   Flush data in buffer of given lba to disk,

+ *          if that buffer exists in block cache.

+ * @param   bdev block device descriptor

+ * @param   lba logical block address

+ * @return  standard error code*/

+int ext4_block_flush_lba(struct ext4_blockdev *bdev, u64int lba);

+/**@brief   Set logical block size in block device.

+ * @param   bdev block device descriptor

+ * @param   lb_size logical block size (in bytes)

+ * @return  standard error code*/

+void ext4_block_set_lb_size(struct ext4_blockdev *bdev, u32int lb_bsize);

+/**@brief   Block get function (through cache, don't read).

+ * @param   bdev block device descriptor

+ * @param   b block descriptor

+ * @param   lba logical block address

+ * @return  standard error code*/

+int ext4_block_get_noread(struct ext4_blockdev *bdev, struct ext4_block *b,

+			  u64int lba);

+/**@brief   Block get function (through cache).

+ * @param   bdev block device descriptor

+ * @param   b block descriptor

+ * @param   lba logical block address

+ * @return  standard error code*/

+int ext4_block_get(struct ext4_blockdev *bdev, struct ext4_block *b,

+		   u64int lba);

+/**@brief   Block set procedure (through cache).

+ * @param   bdev block device descriptor

+ * @param   b block descriptor

+ * @return  standard error code*/

+int ext4_block_set(struct ext4_blockdev *bdev, struct ext4_block *b);

+/**@brief   Block read procedure (without cache)

+ * @param   bdev block device descriptor

+ * @param   buf output buffer

+ * @param   lba logical block address

+ * @return  standard error code*/

+int ext4_blocks_get_direct(struct ext4_blockdev *bdev, void *buf, u64int lba,

+			   u32int cnt);

+/**@brief   Block write procedure (without cache)

+ * @param   bdev block device descriptor

+ * @param   buf output buffer

+ * @param   lba logical block address

+ * @return  standard error code*/

+int ext4_blocks_set_direct(struct ext4_blockdev *bdev, const void *buf,

+			   u64int lba, u32int cnt);

+/**@brief   Write to block device (by direct address).

+ * @param   bdev block device descriptor

+ * @param   off byte offset in block device

+ * @param   buf input buffer

+ * @param   len length of the write buffer

+ * @return  standard error code*/

+int ext4_block_writebytes(struct ext4_blockdev *bdev, u64int off,

+			  const void *buf, u32int len);

+/**@brief   Read freom block device (by direct address).

+ * @param   bdev block device descriptor

+ * @param   off byte offset in block device

+ * @param   buf input buffer

+ * @param   len length of the write buffer

+ * @return  standard error code*/

+int ext4_block_readbytes(struct ext4_blockdev *bdev, u64int off, void *buf,

+			 u32int len);

+/**@brief   Flush all dirty buffers to disk

+ * @param   bdev block device descriptor

+ * @return  standard error code*/

+int ext4_block_cache_flush(struct ext4_blockdev *bdev);

+/**@brief   Enable/disable write back cache mode

+ * @param   bdev block device descriptor

+ * @param   on_off

+ *              !0 - ENABLE

+ *               0 - DISABLE (all delayed cache buffers will be flushed)

+ * @return  standard error code*/

+int ext4_block_cache_write_back(struct ext4_blockdev *bdev, u8int on_off);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_config.h

@@ -1,0 +1,40 @@

+#pragma once

+#include <u.h>

+#include <libc.h>

+typedef enum { false, true } bool;

+enum {

+	O_RDONLY = 00,

+	O_WRONLY = 01,

+	O_RDWR = 02,

+	O_CREAT = 0100,

+	O_EXCL = 0200,

+	O_TRUNC = 01000,

+	O_APPEND = 02000,

+};

+#if defined(__mips__) || defined(__power__) || defined(__power64__) || defined(__sparc__) || defined(__sparc64__)

+#define CONFIG_BIG_ENDIAN

+#endif

+#define CONFIG_EXT4_MAX_BLOCKDEV_NAME 128

+#define CONFIG_EXT4_MAX_MP_NAME 128

+#define CONFIG_EXT4_BLOCKDEVS_COUNT 32

+#define CONFIG_EXT4_MOUNTPOINTS_COUNT 32

+#define CONFIG_BLOCK_DEV_CACHE_SIZE 1024

+/* Maximum single truncate size. Transactions must be limited to reduce

+ * number of allocations for single transaction

+ */

+#define CONFIG_MAX_TRUNCATE_SIZE (16ul * 1024ul * 1024ul)

+extern char Eexists[];

+extern char Einval[];

+extern char Eio[];

+extern char Enomem[];

+extern char Enospc[];

+extern char Enotfound[];

+extern char Eperm[];

+extern char Erdonlyfs[];

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_crc32.h

@@ -1,0 +1,18 @@

+/* Based on FreeBSD. */

+#pragma once

+#include "ext4_config.h"

+/**@brief	CRC32 algorithm.

+ * @param	crc input feed

+ * @param 	buf input buffer

+ * @param	size input buffer length (bytes)

+ * @return	updated crc32 value*/

+u32int ext4_crc32(u32int crc, const void *buf, u32int size);

+/**@brief	CRC32C algorithm.

+ * @param	crc input feed

+ * @param 	buf input buffer

+ * @param	length input buffer length (bytes)

+ * @return	updated crc32c value*/

+u32int ext4_crc32c(u32int crc, const void *buf, u32int size);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_debug.h

@@ -1,0 +1,93 @@

+#pragma once

+#include "ext4_config.h"

+#define DEBUG_BALLOC (1ul << 0)

+#define DEBUG_BCACHE (1ul << 1)

+#define DEBUG_BITMAP (1ul << 2)

+#define DEBUG_BLOCK_GROUP (1ul << 3)

+#define DEBUG_BLOCKDEV (1ul << 4)

+#define DEBUG_DIR_IDX (1ul << 5)

+#define DEBUG_DIR (1ul << 6)

+#define DEBUG_EXTENT (1ul << 7)

+#define DEBUG_FS (1ul << 8)

+#define DEBUG_HASH (1ul << 9)

+#define DEBUG_IALLOC (1ul << 10)

+#define DEBUG_INODE (1ul << 11)

+#define DEBUG_SUPER (1ul << 12)

+#define DEBUG_XATTR (1ul << 13)

+#define DEBUG_MKFS (1ul << 14)

+#define DEBUG_EXT4 (1ul << 15)

+#define DEBUG_JBD (1ul << 16)

+#define DEBUG_MBR (1ul << 17)

+#define DEBUG_NOPREFIX (1ul << 31)

+#define DEBUG_ALL (0xFFFFFFFF)

+static inline const char *ext4_dmask_id2str(u32int m)

+{

+	switch(m) {

+	case DEBUG_BALLOC:

+		return "ext4_balloc: ";

+	case DEBUG_BCACHE:

+		return "ext4_bcache: ";

+	case DEBUG_BITMAP:

+		return "ext4_bitmap: ";

+	case DEBUG_BLOCK_GROUP:

+		return "ext4_block_group: ";

+	case DEBUG_BLOCKDEV:

+		return "ext4_blockdev: ";

+	case DEBUG_DIR_IDX:

+		return "ext4_dir_idx: ";

+	case DEBUG_DIR:

+		return "ext4_dir: ";

+	case DEBUG_EXTENT:

+		return "ext4_extent: ";

+	case DEBUG_FS:

+		return "ext4_fs: ";

+	case DEBUG_HASH:

+		return "ext4_hash: ";

+	case DEBUG_IALLOC:

+		return "ext4_ialloc: ";

+	case DEBUG_INODE:

+		return "ext4_inode: ";

+	case DEBUG_SUPER:

+		return "ext4_super: ";

+	case DEBUG_MKFS:

+		return "ext4_mkfs: ";

+	case DEBUG_JBD:

+		return "ext4_jbd: ";

+	case DEBUG_MBR:

+		return "ext4_mbr: ";

+	case DEBUG_EXT4:

+		return "ext4: ";

+	}

+	return "";

+}

+#define DBG_NONE  ""

+#define DBG_INFO  "[info]  "

+#define DBG_WARN  "[warn]  "

+#define DBG_ERROR "[error] "

+/**@brief   Global mask debug set.

+ * @brief   m new debug mask.*/

+void ext4_dmask_set(u32int m);

+/**@brief   Global mask debug clear.

+ * @brief   m new debug mask.*/

+void ext4_dmask_clr(u32int m);

+/**@brief   Global debug mask get.

+ * @return  debug mask*/

+u32int ext4_dmask_get(void);

+/**@brief   Debug printf.*/

+#define ext4_dbg(m, ...) \

+	do { \

+		if ((m) & ext4_dmask_get()) { \

+			if (!((m) & DEBUG_NOPREFIX)) { \

+				fprint(2, "%s: %s", __func__, ext4_dmask_id2str(m)); \

+			} \

+			fprint(2, __VA_ARGS__); \

+		} \

+	} while (0)

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_dir.h

@@ -1,0 +1,243 @@

+#pragma once

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+#include "ext4_blockdev.h"

+#include "ext4_super.h"

+struct ext4_dir_iter {

+	struct ext4_inode_ref *inode_ref;

+	struct ext4_block curr_blk;

+	u64int curr_off;

+	struct ext4_dir_en *curr;

+};

+struct ext4_dir_search_result {

+	struct ext4_block block;

+	struct ext4_dir_en *dentry;

+};

+/**@brief Get i-node number from directory entry.

+ * @param de Directory entry

+ * @return I-node number

+ */

+static inline u32int

+ext4_dir_en_get_inode(struct ext4_dir_en *de)

+{

+	return to_le32(de->inode);

+}

+/**@brief Set i-node number to directory entry.

+ * @param de Directory entry

+ * @param inode I-node number

+ */

+static inline void

+ext4_dir_en_set_inode(struct ext4_dir_en *de, u32int inode)

+{

+	de->inode = to_le32(inode);

+}

+/**@brief Set i-node number to directory entry. (For HTree root)

+ * @param de Directory entry

+ * @param inode I-node number

+ */

+static inline void

+ext4_dx_dot_en_set_inode(struct ext4_dir_idx_dot_en *de, u32int inode)

+{

+	de->inode = to_le32(inode);

+}

+/**@brief Get directory entry length.

+ * @param de Directory entry

+ * @return Entry length

+ */

+static inline u16int ext4_dir_en_get_entry_len(struct ext4_dir_en *de)

+{

+	return to_le16(de->entry_len);

+}

+/**@brief Set directory entry length.

+ * @param de     Directory entry

+ * @param length Entry length

+ */

+static inline void ext4_dir_en_set_entry_len(struct ext4_dir_en *de, u16int l)

+{

+	de->entry_len = to_le16(l);

+}

+/**@brief Get directory entry name length.

+ * @param sb Superblock

+ * @param de Directory entry

+ * @return Entry name length

+ */

+static inline u16int ext4_dir_en_get_name_len(struct ext4_sblock *sb,

+						struct ext4_dir_en *de)

+{

+	u16int v = de->name_len;

+	if ((ext4_get32(sb, rev_level) == 0) &&

+	    (ext4_get32(sb, minor_rev_level) < 5))

+		v |= ((u16int)de->in.name_length_high) << 8;

+	return v;

+}

+/**@brief Set directory entry name length.

+ * @param sb     Superblock

+ * @param de     Directory entry

+ * @param length Entry name length

+ */

+static inline void ext4_dir_en_set_name_len(struct ext4_sblock *sb,

+					    struct ext4_dir_en *de,

+					    u16int len)

+{

+	de->name_len = (len << 8) >> 8;

+	if ((ext4_get32(sb, rev_level) == 0) &&

+	    (ext4_get32(sb, minor_rev_level) < 5))

+		de->in.name_length_high = len >> 8;

+}

+/**@brief Get i-node type of directory entry.

+ * @param sb Superblock

+ * @param de Directory entry

+ * @return I-node type (file, dir, etc.)

+ */

+static inline u8int ext4_dir_en_get_inode_type(struct ext4_sblock *sb,

+						 struct ext4_dir_en *de)

+{

+	if ((ext4_get32(sb, rev_level) > 0) ||

+	    (ext4_get32(sb, minor_rev_level) >= 5))

+		return de->in.inode_type;

+	return EXT4_DE_UNKNOWN;

+}

+/**@brief Set i-node type of directory entry.

+ * @param sb   Superblock

+ * @param de   Directory entry

+ * @param type I-node type (file, dir, etc.)

+ */

+static inline void ext4_dir_en_set_inode_type(struct ext4_sblock *sb,

+					      struct ext4_dir_en *de, u8int t)

+{

+	if ((ext4_get32(sb, rev_level) > 0) ||

+	    (ext4_get32(sb, minor_rev_level) >= 5))

+		de->in.inode_type = t;

+}

+/**@brief Verify checksum of a linear directory leaf block

+ * @param inode_ref Directory i-node

+ * @param dirent    Linear directory leaf block

+ * @return true means the block passed checksum verification

+ */

+bool ext4_dir_csum_verify(struct ext4_inode_ref *inode_ref,

+			  struct ext4_dir_en *dirent);

+/**@brief Initialize directory iterator.

+ * Set position to the first valid entry from the required position.

+ * @param it        Pointer to iterator to be initialized

+ * @param inode_ref Directory i-node

+ * @param pos       Position to start reading entries from

+ * @return Error code

+ */

+int ext4_dir_iterator_init(struct ext4_dir_iter *it,

+			   struct ext4_inode_ref *inode_ref, u64int pos);

+/**@brief Jump to the next valid entry

+ * @param it Initialized iterator

+ * @return Error code

+ */

+int ext4_dir_iterator_next(struct ext4_dir_iter *it);

+/**@brief Uninitialize directory iterator.

+ *        Release all allocated structures.

+ * @param it Iterator to be finished

+ * @return Error code

+ */

+int ext4_dir_iterator_fini(struct ext4_dir_iter *it);

+/**@brief Write directory entry to concrete data block.

+ * @param sb        Superblock

+ * @param en     Pointer to entry to be written

+ * @param entry_len Length of new entry

+ * @param child     Child i-node to be written to new entry

+ * @param name      Name of the new entry

+ * @param name_len  Length of entry name

+ */

+void ext4_dir_write_entry(struct ext4_sblock *sb, struct ext4_dir_en *en,

+			  u16int entry_len, struct ext4_inode_ref *child,

+			  const char *name, usize name_len);

+/**@brief Add new entry to the directory.

+ * @param parent Directory i-node

+ * @param name   Name of new entry

+ * @param child  I-node to be referenced from new entry

+ * @return Error code

+ */

+int ext4_dir_add_entry(struct ext4_inode_ref *parent, const char *name,

+		       u32int name_len, struct ext4_inode_ref *child);

+/**@brief Find directory entry with passed name.

+ * @param result Result structure to be returned if entry found

+ * @param parent Directory i-node

+ * @param name   Name of entry to be found

+ * @param name_len  Name length

+ * @return Error code

+ */

+int ext4_dir_find_entry(struct ext4_dir_search_result *result,

+			struct ext4_inode_ref *parent, const char *name,

+			u32int name_len);

+/**@brief Remove directory entry.

+ * @param parent Directory i-node

+ * @param name   Name of the entry to be removed

+ * @param name_len  Name length

+ * @return Error code

+ */

+int ext4_dir_remove_entry(struct ext4_inode_ref *parent, const char *name,

+			  u32int name_len);

+/**@brief Try to insert entry to concrete data block.

+ * @param sb           Superblock

+ * @param inode_ref    Directory i-node

+ * @param dst_blk      Block to try to insert entry to

+ * @param child        Child i-node to be inserted by new entry

+ * @param name         Name of the new entry

+ * @param name_len     Length of the new entry name

+ * @return Error code

+ */

+int ext4_dir_try_insert_entry(struct ext4_sblock *sb,

+			      struct ext4_inode_ref *inode_ref,

+			      struct ext4_block *dst_blk,

+			      struct ext4_inode_ref *child, const char *name,

+			      u32int name_len);

+/**@brief Try to find entry in block by name.

+ * @param block     Block containing entries

+ * @param sb        Superblock

+ * @param name_len  Length of entry name

+ * @param name      Name of entry to be found

+ * @param res_entry Output pointer to found entry, nil if not found

+ * @return Error code

+ */

+int ext4_dir_find_in_block(struct ext4_block *block, struct ext4_sblock *sb,

+			   usize name_len, const char *name,

+			   struct ext4_dir_en **res_entry);

+/**@brief Simple function to release allocated data from result.

+ * @param parent Parent inode

+ * @param result Search result to destroy

+ * @return Error code

+ *

+ */

+int ext4_dir_destroy_result(struct ext4_inode_ref *parent,

+			    struct ext4_dir_search_result *result);

+void ext4_dir_set_csum(struct ext4_inode_ref *inode_ref,

+		       struct ext4_dir_en *dirent);

+void ext4_dir_init_entry_tail(struct ext4_dir_entry_tail *t);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_dir_idx.h

@@ -1,0 +1,52 @@

+#pragma once

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_fs.h"

+#include "ext4_dir.h"

+struct ext4_dir_idx_block {

+	struct ext4_block b;

+	struct ext4_dir_idx_entry *entries;

+	struct ext4_dir_idx_entry *position;

+};

+#define EXT4_DIR_DX_INIT_BCNT 2

+/**@brief Initialize index structure of new directory.

+ * @param dir Pointer to directory i-node

+ * @param dir Pointer to parent directory i-node

+ * @return Error code

+ */

+int ext4_dir_dx_init(struct ext4_inode_ref *dir,

+		     struct ext4_inode_ref *parent);

+/**@brief Try to find directory entry using directory index.

+ * @param result    Output value - if entry will be found,

+ *                  than will be passed through this parameter

+ * @param inode_ref Directory i-node

+ * @param name_len  Length of name to be found

+ * @param name      Name to be found

+ * @return Error code

+ */

+int ext4_dir_dx_find_entry(struct ext4_dir_search_result *result,

+			   struct ext4_inode_ref *inode_ref, usize name_len,

+			   const char *name);

+/**@brief Add new entry to indexed directory

+ * @param parent Directory i-node

+ * @param child  I-node to be referenced from directory entry

+ * @param name   Name of new directory entry

+ * @return Error code

+ */

+int ext4_dir_dx_add_entry(struct ext4_inode_ref *parent,

+			  struct ext4_inode_ref *child, const char *name, u32int name_len);

+/**@brief Add new entry to indexed directory

+ * @param dir           Directory i-node

+ * @param parent_inode  parent inode index

+ * @return Error code

+ */

+int ext4_dir_dx_reset_parent_inode(struct ext4_inode_ref *dir,

+                                   u32int parent_inode);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_extent.h

@@ -1,0 +1,312 @@

+#pragma once

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+#include "ext4_inode.h"

+/*

+ * Array of ext4_ext_path contains path to some extent.

+ * Creation/lookup routines use it for traversal/splitting/etc.

+ * Truncate uses it to simulate recursive walking.

+ */

+struct ext4_extent_path {

+	struct ext4_block block;

+	u16int depth;

+	struct ext4_extent_header *header;

+	struct ext4_extent_index *index;

+	struct ext4_extent *extent;

+};

+#define EXT4_EXT_UNWRITTEN_MASK (1L << 15)

+#define EXT4_EXT_MAX_LEN_WRITTEN (1L << 15)

+#define EXT4_EXT_MAX_LEN_UNWRITTEN \

+	(EXT4_EXT_MAX_LEN_WRITTEN - 1)

+#define EXT4_EXT_GET_LEN(ex) to_le16((ex)->nblocks)

+#define EXT4_EXT_GET_LEN_UNWRITTEN(ex) \

+	(EXT4_EXT_GET_LEN(ex) & ~(EXT4_EXT_UNWRITTEN_MASK))

+#define EXT4_EXT_SET_LEN(ex, count) \

+	((ex)->nblocks = to_le16(count))

+#define EXT4_EXT_IS_UNWRITTEN(ex) \

+	(EXT4_EXT_GET_LEN(ex) > EXT4_EXT_MAX_LEN_WRITTEN)

+#define EXT4_EXT_SET_UNWRITTEN(ex) \

+	((ex)->nblocks |= to_le16(EXT4_EXT_UNWRITTEN_MASK))

+#define EXT4_EXT_SET_WRITTEN(ex) \

+	((ex)->nblocks &= ~(to_le16(EXT4_EXT_UNWRITTEN_MASK)))

+#define EXT4_EXTENT_FIRST(header)                                              \

+	((struct ext4_extent *)(((char *)(header)) +                           \

+				sizeof(struct ext4_extent_header)))

+#define EXT4_EXTENT_FIRST_INDEX(header)                                        \

+	((struct ext4_extent_index *)(((char *)(header)) +                     \

+				      sizeof(struct ext4_extent_header)))

+#define EXT4_EXTENT_LAST(header)                                              \

+	((struct ext4_extent *)(((char *)(header)) +                          \

+				sizeof(struct ext4_extent_header)) +          \

+				(header)->nentries - 1)

+#define EXT4_EXTENT_LAST_INDEX(header)                                        \

+	((struct ext4_extent_index *)(((char *)(header)) +                    \

+				      sizeof(struct ext4_extent_header)) +    \

+				      (header)->nentries - 1)

+#define EXT4_EXTENT_SIZE sizeof(struct ext4_extent)

+#define EXT4_EXTENT_INDEX_SIZE sizeof(struct ext4_extent_index)

+#define EXT4_EXTENT_TAIL_OFFSET(hdr)                                           \

+	(sizeof(struct ext4_extent_header) +                                   \

+	 (sizeof(struct ext4_extent) * to_le16((hdr)->max_nentries)))

+#define EXT4_EXTENT_IN_RANGE(iblock, eiblock, len)	\

+	((iblock) >= (eiblock) && (iblock) <= (eiblock) + (len) - 1)

+#define EXT4_EXTENT_MAX_BLOCKS    ((u32int)(-1))

+/**@brief Get logical number of the block covered by extent.

+ * @param extent Extent to load number from

+ * @return Logical number of the first block covered by extent */

+static inline u32int ext4_extent_get_iblock(struct ext4_extent *extent)

+{

+	return to_le32(extent->iblock);

+}

+/**@brief Set logical number of the first block covered by extent.

+ * @param extent Extent to set number to

+ * @param iblock Logical number of the first block covered by extent */

+static inline void ext4_extent_set_iblock(struct ext4_extent *extent,

+					  ext4_lblk_t iblock)

+{

+	extent->iblock = to_le32(iblock);

+}

+/**@brief Get number of blocks covered by extent.

+ * @param extent Extent to load count from

+ * @return Number of blocks covered by extent */

+static inline u16int ext4_extent_get_nblocks(struct ext4_extent *extent)

+{

+	if (EXT4_EXT_IS_UNWRITTEN(extent))

+		return EXT4_EXT_GET_LEN_UNWRITTEN(extent);

+	else

+		return EXT4_EXT_GET_LEN(extent);

+}

+/**@brief Set number of blocks covered by extent.

+ * @param extent Extent to load count from

+ * @param count  Number of blocks covered by extent

+ * @param unwritten Whether the extent is unwritten or not */

+static inline void

+ext4_extent_set_nblocks(struct ext4_extent *extent,

+			      u16int count, bool unwritten)

+{

+	EXT4_EXT_SET_LEN(extent, count);

+	if (unwritten)

+		EXT4_EXT_SET_UNWRITTEN(extent);

+}

+/**@brief Get physical number of the first block covered by extent.

+ * @param extent Extent to load number

+ * @return Physical number of the first block covered by extent */

+static inline u64int ext4_extent_get_fblock(struct ext4_extent *extent)

+{

+	return ((u64int)to_le16(extent->fblock_hi)) << 32 |

+			((u64int)to_le32(extent->fblock_lo));

+}

+/**@brief Set physical number of the first block covered by extent.

+ * @param extent Extent to load number

+ * @param fblock Physical number of the first block covered by extent */

+static inline void

+ext4_extent_set_fblock(struct ext4_extent *extent, u64int fblock)

+{

+	extent->fblock_lo = to_le32((fblock << 32) >> 32);

+	extent->fblock_hi = to_le16((u16int)(fblock >> 32));

+}

+/**@brief Get logical number of the block covered by extent index.

+ * @param index Extent index to load number from

+ * @return Logical number of the first block covered by extent index */

+static inline u32int

+ext4_extent_index_get_iblock(struct ext4_extent_index *index)

+{

+	return to_le32(index->iblock);

+}

+/**@brief Set logical number of the block covered by extent index.

+ * @param index  Extent index to set number to

+ * @param iblock Logical number of the first block covered by extent index */

+static inline void

+ext4_extent_index_set_iblock(struct ext4_extent_index *index,

+                             u32int iblock)

+{

+	index->iblock = to_le32(iblock);

+}

+/**@brief Get physical number of block where the child node is located.

+ * @param index Extent index to load number from

+ * @return Physical number of the block with child node */

+static inline u64int

+ext4_extent_index_get_fblock(struct ext4_extent_index *index)

+{

+	return ((u64int)to_le16(index->fblock_hi)) << 32 |

+			((u64int)to_le32(index->fblock_lo));

+}

+/**@brief Set physical number of block where the child node is located.

+ * @param index  Extent index to set number to

+ * @param fblock Ohysical number of the block with child node */

+static inline void ext4_extent_index_set_fblock(struct ext4_extent_index *index,

+						u64int fblock)

+{

+	index->fblock_lo = to_le32((fblock << 32) >> 32);

+	index->fblock_hi = to_le16((u16int)(fblock >> 32));

+}

+/**@brief Get magic value from extent header.

+ * @param header Extent header to load value from

+ * @return Magic value of extent header */

+static inline u16int

+ext4_extent_header_get_magic(struct ext4_extent_header *header)

+{

+	return to_le16(header->magic);

+}

+/**@brief Set magic value to extent header.

+ * @param header Extent header to set value to

+ * @param magic  Magic value of extent header */

+static inline void ext4_extent_header_set_magic(struct ext4_extent_header *header,

+						u16int magic)

+{

+	header->magic = to_le16(magic);

+}

+/**@brief Get number of entries from extent header

+ * @param header Extent header to get value from

+ * @return Number of entries covered by extent header */

+static inline u16int

+ext4_extent_header_get_nentries(struct ext4_extent_header *header)

+{

+	return to_le16(header->nentries);

+}

+/**@brief Set number of entries to extent header

+ * @param header Extent header to set value to

+ * @param count  Number of entries covered by extent header */

+static inline void

+ext4_extent_header_set_nentries(struct ext4_extent_header *header,

+				u16int count)

+{

+	header->nentries = to_le16(count);

+}

+/**@brief Get maximum number of entries from extent header

+ * @param header Extent header to get value from

+ * @return Maximum number of entries covered by extent header */

+static inline u16int

+ext4_extent_header_get_max_nentries(struct ext4_extent_header *header)

+{

+	return to_le16(header->max_nentries);

+}

+/**@brief Set maximum number of entries to extent header

+ * @param header    Extent header to set value to

+ * @param max_count Maximum number of entries covered by extent header */

+static inline void

+ext4_extent_header_set_max_nentries(struct ext4_extent_header *header,

+					 u16int max_count)

+{

+	header->max_nentries = to_le16(max_count);

+}

+/**@brief Get depth of extent subtree.

+ * @param header Extent header to get value from

+ * @return Depth of extent subtree */

+static inline u16int

+ext4_extent_header_get_depth(struct ext4_extent_header *header)

+{

+	return to_le16(header->depth);

+}

+/**@brief Set depth of extent subtree.

+ * @param header Extent header to set value to

+ * @param depth  Depth of extent subtree */

+static inline void

+ext4_extent_header_set_depth(struct ext4_extent_header *header,

+			     u16int depth)

+{

+	header->depth = to_le16(depth);

+}

+/**@brief Get generation from extent header

+ * @param header Extent header to get value from

+ * @return Generation */

+static inline u32int

+ext4_extent_header_get_generation(struct ext4_extent_header *header)

+{

+	return to_le32(header->generation);

+}

+/**@brief Set generation to extent header

+ * @param header     Extent header to set value to

+ * @param generation Generation */

+static inline void

+ext4_extent_header_set_generation(struct ext4_extent_header *header,

+				       u32int generation)

+{

+	header->generation = to_le32(generation);

+}

+/******************************************************************************/

+/**TODO:  */

+static inline void ext4_extent_tree_init(struct ext4_inode_ref *inode_ref)

+{

+	/* Initialize extent root header */

+	struct ext4_extent_header *header =

+			ext4_inode_get_extent_header(inode_ref->inode);

+	ext4_extent_header_set_depth(header, 0);

+	ext4_extent_header_set_nentries(header, 0);

+	ext4_extent_header_set_generation(header, 0);

+	ext4_extent_header_set_magic(header, EXT4_EXTENT_MAGIC);

+	u16int max_entries = (EXT4_INODE_BLOCKS * sizeof(u32int) -

+				sizeof(struct ext4_extent_header)) /

+				sizeof(struct ext4_extent);

+	ext4_extent_header_set_max_nentries(header, max_entries);

+	inode_ref->dirty  = true;

+}

+/**@brief Extent-based blockmap manipulation

+ * @param inode_ref   I-node

+ * @param iblock      starting logical block of the inode

+ * @param max_nblocks maximum number of blocks to get from/allocate to blockmap

+ * @param resfblockp  return physical block address of the first block of an

+ * extent

+ * @param create      true if caller wants to insert mapping or convert

+ * unwritten mapping to written one

+ * @param resnblocksp return number of blocks in an extent (must be smaller than

+ * \p max_nblocks)

+ * @return Error code*/

+int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref,

+			   ext4_lblk_t iblock,

+			   ext4_lblk_t max_nblocks,

+			   ext4_fsblk_t *resfblockp,

+			   bool create,

+			   ext4_lblk_t *resnblocksp);

+/**@brief Release all data blocks starting from specified logical block.

+ * @param inode_ref   I-node to release blocks from

+ * @param iblock_from First logical block to release

+ * @return Error code */

+int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref,

+			     ext4_lblk_t from,

+			     ext4_lblk_t to);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_fs.h

@@ -1,0 +1,222 @@

+#pragma once

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+struct ext4_fs {

+	bool read_only;

+	struct ext4_blockdev *bdev;

+	struct ext4_sblock sb;

+	u64int inode_block_limits[4];

+	u64int inode_blocks_per_level[4];

+	u32int last_inode_bg_id;

+	struct jbd_fs *jbd_fs;

+	struct jbd_journal *jbd_journal;

+	struct jbd_trans *curr_trans;

+};

+struct ext4_block_group_ref {

+	struct ext4_block block;

+	struct ext4_bgroup *block_group;

+	struct ext4_fs *fs;

+	u32int index;

+	bool dirty;

+};

+struct ext4_inode_ref {

+	struct ext4_block block;

+	struct ext4_inode *inode;

+	struct ext4_fs *fs;

+	u32int index;

+	bool dirty;

+};

+#pragma incomplete struct ext4_fs

+/**@brief Convert block address to relative index in block group.

+ * @param sb Superblock pointer

+ * @param baddr Block number to convert

+ * @return Relative number of block

+ */

+static inline u32int ext4_fs_addr_to_idx_bg(struct ext4_sblock *s,

+						     ext4_fsblk_t baddr)

+{

+	if (ext4_get32(s, first_data_block) && baddr)

+		baddr--;

+	return baddr % ext4_get32(s, blocks_per_group);

+}

+/**@brief Convert relative block address in group to absolute address.

+ * @param s Superblock pointer

+ * @param index Relative block address

+ * @param bgid Block group

+ * @return Absolute block address

+ */

+static inline ext4_fsblk_t ext4_fs_bg_idx_to_addr(struct ext4_sblock *s,

+						     u32int index,

+						     u32int bgid)

+{

+	if (ext4_get32(s, first_data_block))

+		index++;

+	return ext4_get32(s, blocks_per_group) * bgid + index;

+}

+/**@brief TODO: */

+static inline ext4_fsblk_t ext4_fs_first_bg_block_no(struct ext4_sblock *s,

+						 u32int bgid)

+{

+	return (u64int)bgid * ext4_get32(s, blocks_per_group) +

+	       ext4_get32(s, first_data_block);

+}

+/**@brief Initialize filesystem and read all needed data.

+ * @param fs Filesystem instance to be initialized

+ * @param bdev Identifier if device with the filesystem

+ * @param read_only Mark the filesystem as read-only.

+ * @return Error code

+ */

+int ext4_fs_init(struct ext4_fs *fs, struct ext4_blockdev *bdev,

+		 bool read_only);

+/**@brief Destroy filesystem instance (used by unmount operation).

+ * @param fs Filesystem to be destroyed

+ * @return Error code

+ */

+int ext4_fs_fini(struct ext4_fs *fs);

+/**@brief Check filesystem's features, if supported by this driver

+ * Function can return 0 and set read_only flag. It mean's that

+ * there are some not-supported features, that can cause problems

+ * during some write operations.

+ * @param fs        Filesystem to be checked

+ * @param read_only Flag if filesystem should be mounted only for reading

+ * @return Error code

+ */

+int ext4_fs_check_features(struct ext4_fs *fs, bool *read_only);

+/**@brief Get reference to block group specified by index.

+ * @param fs   Filesystem to find block group on

+ * @param bgid Index of block group to load

+ * @param ref  Output pointer for reference

+ * @return Error code

+ */

+int ext4_fs_get_block_group_ref(struct ext4_fs *fs, u32int bgid,

+				struct ext4_block_group_ref *ref);

+/**@brief Put reference to block group.

+ * @param ref Pointer for reference to be put back

+ * @return Error code

+ */

+int ext4_fs_put_block_group_ref(struct ext4_block_group_ref *ref);

+/**@brief Get reference to i-node specified by index.

+ * @param fs    Filesystem to find i-node on

+ * @param index Index of i-node to load

+ * @param ref   Output pointer for reference

+ * @return Error code

+ */

+int ext4_fs_get_inode_ref(struct ext4_fs *fs, u32int index,

+			  struct ext4_inode_ref *ref);

+/**@brief Reset blocks field of i-node.

+ * @param fs        Filesystem to reset blocks field of i-inode on

+ * @param inode_ref ref Pointer for inode to be operated on

+ */

+void ext4_fs_inode_blocks_init(struct ext4_fs *fs,

+			       struct ext4_inode_ref *inode_ref);

+/**@brief Put reference to i-node.

+ * @param ref Pointer for reference to be put back

+ * @return Error code

+ */

+int ext4_fs_put_inode_ref(struct ext4_inode_ref *ref);

+/**@brief Convert filetype to inode mode.

+ * @param filetype

+ * @return inode mode

+ */

+u32int ext4_fs_correspond_inode_mode(int filetype);

+/**@brief Allocate new i-node in the filesystem.

+ * @param fs        Filesystem to allocated i-node on

+ * @param inode_ref Output pointer to return reference to allocated i-node

+ * @param filetype  File type of newly created i-node

+ * @return Error code

+ */

+int ext4_fs_alloc_inode(struct ext4_fs *fs, struct ext4_inode_ref *inode_ref,

+			int filetype);

+/**@brief Release i-node and mark it as free.

+ * @param inode_ref I-node to be released

+ * @return Error code

+ */

+int ext4_fs_free_inode(struct ext4_inode_ref *inode_ref);

+/**@brief Truncate i-node data blocks.

+ * @param inode_ref I-node to be truncated

+ * @param new_size  New size of inode (must be < current size)

+ * @return Error code

+ */

+int ext4_fs_truncate_inode(struct ext4_inode_ref *inode_ref, u64int new_size);

+/**@brief Compute 'goal' for inode index

+ * @param inode_ref Reference to inode, to allocate block for

+ * @return goal

+ */

+ext4_fsblk_t ext4_fs_inode_to_goal_block(struct ext4_inode_ref *inode_ref);

+/**@brief Compute 'goal' for allocation algorithm (For blockmap).

+ * @param inode_ref Reference to inode, to allocate block for

+ * @param goal

+ * @return error code

+ */

+int ext4_fs_indirect_find_goal(struct ext4_inode_ref *inode_ref,

+				ext4_fsblk_t *goal);

+/**@brief Get physical block address by logical index of the block.

+ * @param inode_ref I-node to read block address from

+ * @param iblock            Logical index of block

+ * @param fblock            Output pointer for return physical

+ *                          block address

+ * @param support_unwritten Indicate whether unwritten block range

+ *                          is supported under the current context

+ * @return Error code

+ */

+int ext4_fs_get_inode_dblk_idx(struct ext4_inode_ref *inode_ref,

+				 ext4_lblk_t iblock, ext4_fsblk_t *fblock,

+				 bool support_unwritten);

+/**@brief Initialize a part of unwritten range of the inode.

+ * @param inode_ref I-node to proceed on.

+ * @param iblock    Logical index of block

+ * @param fblock    Output pointer for return physical block address

+ * @return Error code

+ */

+int ext4_fs_init_inode_dblk_idx(struct ext4_inode_ref *inode_ref,

+				  ext4_lblk_t iblock, ext4_fsblk_t *fblock);

+/**@brief Append following logical block to the i-node.

+ * @param inode_ref I-node to append block to

+ * @param fblock    Output physical block address of newly allocated block

+ * @param iblock    Output logical number of newly allocated block

+ * @return Error code

+ */

+int ext4_fs_append_inode_dblk(struct ext4_inode_ref *inode_ref,

+			      ext4_fsblk_t *fblock, ext4_lblk_t *iblock);

+/**@brief   Increment inode link count.

+ * @param   inode none handle

+ */

+void ext4_fs_inode_links_count_inc(struct ext4_inode_ref *inode_ref);

+/**@brief   Decrement inode link count.

+ * @param   inode none handle

+ */

+void ext4_fs_inode_links_count_dec(struct ext4_inode_ref *inode_ref);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_hash.h

@@ -1,0 +1,22 @@

+#pragma once

+#include "ext4_config.h"

+struct ext4_hash_info {

+	u32int hash;

+	u32int minor_hash;

+	u32int hash_version;

+	const u32int *seed;

+};

+/**@brief   Directory entry name hash function.

+ * @param   name entry name

+ * @param   len entry name length

+ * @param   hash_seed (from superblock)

+ * @param   hash version (from superblock)

+ * @param   hash_minor output value

+ * @param   hash_major output value

+ * @return  standard error code*/

+int ext2_htree_hash(const char *name, int len, const u32int *hash_seed,

+		    int hash_version, u32int *hash_major,

+		    u32int *hash_minor);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_ialloc.h

@@ -1,0 +1,29 @@

+#pragma once

+#include "ext4_config.h"

+#include "ext4_types.h"

+/**@brief Calculate and set checksum of inode bitmap.

+ * @param sb superblock pointer.

+ * @param bg block group

+ * @param bitmap bitmap buffer

+ */

+void ext4_ialloc_set_bitmap_csum(struct ext4_sblock *sb, struct ext4_bgroup *bg,

+				 void *bitmap);

+/**@brief Free i-node number and modify filesystem data structers.

+ * @param fs     Filesystem, where the i-node is located

+ * @param index  Index of i-node to be release

+ * @param is_dir Flag us for information whether i-node is directory or not

+ */

+int ext4_ialloc_free_inode(struct ext4_fs *fs, u32int index, bool is_dir);

+/**@brief I-node allocation algorithm.

+ * This is more simple algorithm, than Orlov allocator used

+ * in the Linux kernel.

+ * @param fs     Filesystem to allocate i-node on

+ * @param index  Output value - allocated i-node number

+ * @param is_dir Flag if allocated i-node will be file or directory

+ * @return Error code

+ */

+int ext4_ialloc_alloc_inode(struct ext4_fs *fs, u32int *index, bool is_dir);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_inode.h

@@ -1,0 +1,304 @@

+#pragma once

+#include "ext4_config.h"

+#include "ext4_types.h"

+#pragma incomplete struct ext4_extent_header

+/**@brief Get mode of the i-node.

+ * @param sb    Superblock

+ * @param inode I-node to load mode from

+ * @return Mode of the i-node

+ */

+u32int ext4_inode_get_mode(struct ext4_sblock *sb, struct ext4_inode *inode);

+/**@brief Set mode of the i-node.

+ * @param sb    Superblock

+ * @param inode I-node to set mode to

+ * @param mode  Mode to set to i-node

+ */

+void ext4_inode_set_mode(struct ext4_sblock *sb, struct ext4_inode *inode,

+			 u32int mode);

+/**@brief Get ID of the i-node owner (user id).

+ * @param inode I-node to load uid from

+ * @return User ID of the i-node owner

+ */

+u32int ext4_inode_get_uid(struct ext4_inode *inode);

+/**@brief Set ID of the i-node owner.

+ * @param inode I-node to set uid to

+ * @param uid   ID of the i-node owner

+ */

+void ext4_inode_set_uid(struct ext4_inode *inode, u32int uid);

+/**@brief Get real i-node size.

+ * @param sb    Superblock

+ * @param inode I-node to load size from

+ * @return Real size of i-node

+ */

+u64int ext4_inode_get_size(struct ext4_sblock *sb, struct ext4_inode *inode);

+/**@brief Set real i-node size.

+ * @param inode I-node to set size to

+ * @param size  Size of the i-node

+ */

+void ext4_inode_set_size(struct ext4_inode *inode, u64int size);

+/**@brief Get time, when i-node was last accessed.

+ * @param inode I-node

+ * @return Time of the last access (POSIX)

+ */

+u32int ext4_inode_get_access_time(struct ext4_inode *inode);

+/**@brief Set time, when i-node was last accessed.

+ * @param inode I-node

+ * @param time  Time of the last access (POSIX)

+ */

+void ext4_inode_set_access_time(struct ext4_inode *inode, u32int time);

+/**@brief Get time, when i-node was last changed.

+ * @param inode I-node

+ * @return Time of the last change (POSIX)

+ */

+u32int ext4_inode_get_change_inode_time(struct ext4_inode *inode);

+/**@brief Set time, when i-node was last changed.

+ * @param inode I-node

+ * @param time  Time of the last change (POSIX)

+ */

+void ext4_inode_set_change_inode_time(struct ext4_inode *inode, u32int time);

+/**@brief Get time, when i-node content was last modified.

+ * @param inode I-node

+ * @return Time of the last content modification (POSIX)

+ */

+u32int ext4_inode_get_modif_time(struct ext4_inode *inode);

+/**@brief Set time, when i-node content was last modified.

+ * @param inode I-node

+ * @param time  Time of the last content modification (POSIX)

+ */

+void ext4_inode_set_modif_time(struct ext4_inode *inode, u32int time);

+/**@brief Get time, when i-node was deleted.

+ * @param inode I-node

+ * @return Time of the delete action (POSIX)

+ */

+u32int ext4_inode_get_del_time(struct ext4_inode *inode);

+/**@brief Get time, when i-node was created.

+ * @param inode I-node

+ * @return Time of the create action (POSIX)

+ */

+u32int ext4_inode_get_creation_time(struct ext4_inode *inode);

+/**@brief Set time, when i-node was deleted.

+ * @param inode I-node

+ * @param time  Time of the delete action (POSIX)

+ */

+void ext4_inode_set_del_time(struct ext4_inode *inode, u32int time);

+/**@brief Get ID of the i-node owner's group.

+ * @param inode I-node to load gid from

+ * @return Group ID of the i-node owner

+ */

+u32int ext4_inode_get_gid(struct ext4_inode *inode);

+/**@brief Set ID to the i-node owner's group.

+ * @param inode I-node to set gid to

+ * @param gid   Group ID of the i-node owner

+ */

+void ext4_inode_set_gid(struct ext4_inode *inode, u32int gid);

+/**@brief Get number of links to i-node.

+ * @param inode I-node to load number of links from

+ * @return Number of links to i-node

+ */

+u16int ext4_inode_get_links_cnt(struct ext4_inode *inode);

+/**@brief Set number of links to i-node.

+ * @param inode I-node to set number of links to

+ * @param count Number of links to i-node

+ */

+void ext4_inode_set_links_cnt(struct ext4_inode *inode, u16int cnt);

+/**@brief Get number of 512-bytes blocks used for i-node.

+ * @param sb    Superblock

+ * @param inode I-node

+ * @return Number of 512-bytes blocks

+ */

+u64int ext4_inode_get_blocks_count(struct ext4_sblock *sb,

+				     struct ext4_inode *inode);

+/**@brief Set number of 512-bytes blocks used for i-node.

+ * @param sb    Superblock

+ * @param inode I-node

+ * @param count Number of 512-bytes blocks

+ * @return Error code

+ */

+int ext4_inode_set_blocks_count(struct ext4_sblock *sb,

+				struct ext4_inode *inode, u64int cnt);

+/**@brief Get flags (features) of i-node.

+ * @param inode I-node to get flags from

+ * @return Flags (bitmap)

+ */

+u32int ext4_inode_get_flags(struct ext4_inode *inode);

+/**@brief Set flags (features) of i-node.

+ * @param inode I-node to set flags to

+ * @param flags Flags to set to i-node

+ */

+void ext4_inode_set_flags(struct ext4_inode *inode, u32int flags);

+/**@brief Get file generation (used by NFS).

+ * @param inode I-node

+ * @return File generation

+ */

+u32int ext4_inode_get_generation(struct ext4_inode *inode);

+/**@brief Set file generation (used by NFS).

+ * @param inode      I-node

+ * @param generation File generation

+ */

+void ext4_inode_set_generation(struct ext4_inode *inode, u32int gen);

+/**@brief Get extra I-node size field.

+ * @param sb         Superblock

+ * @param inode      I-node

+ * @return extra I-node size

+ */

+u16int ext4_inode_get_extra_isize(struct ext4_sblock *sb,

+				    struct ext4_inode *inode);

+/**@brief Set extra I-node size field.

+ * @param sb         Superblock

+ * @param inode      I-node

+ * @param size       extra I-node size

+ */

+void ext4_inode_set_extra_isize(struct ext4_sblock *sb,

+				struct ext4_inode *inode,

+				u16int size);

+/**@brief Get address of block, where are extended attributes located.

+ * @param inode I-node

+ * @param sb    Superblock

+ * @return Block address

+ */

+u64int ext4_inode_get_file_acl(struct ext4_inode *inode,

+				 struct ext4_sblock *sb);

+/**@brief Set address of block, where are extended attributes located.

+ * @param inode    I-node

+ * @param sb       Superblock

+ * @param file_acl Block address

+ */

+void ext4_inode_set_file_acl(struct ext4_inode *inode, struct ext4_sblock *sb,

+			     u64int acl);

+/**@brief Get block address of specified direct block.

+ * @param inode I-node to load block from

+ * @param idx   Index of logical block

+ * @return Physical block address

+ */

+u32int ext4_inode_get_direct_block(struct ext4_inode *inode, u32int idx);

+/**@brief Set block address of specified direct block.

+ * @param inode  I-node to set block address to

+ * @param idx    Index of logical block

+ * @param fblock Physical block address

+ */

+void ext4_inode_set_direct_block(struct ext4_inode *inode, u32int idx,

+				 u32int block);

+/**@brief Get block address of specified indirect block.

+ * @param inode I-node to get block address from

+ * @param idx   Index of indirect block

+ * @return Physical block address

+ */

+u32int ext4_inode_get_indirect_block(struct ext4_inode *inode, u32int idx);

+/**@brief Set block address of specified indirect block.

+ * @param inode  I-node to set block address to

+ * @param idx    Index of indirect block

+ * @param fblock Physical block address

+ */

+void ext4_inode_set_indirect_block(struct ext4_inode *inode, u32int idx,

+				   u32int block);

+/**@brief Get device number

+ * @param inode  I-node to get device number from

+ * @return Device number

+ */

+u32int ext4_inode_get_dev(struct ext4_inode *inode);

+/**@brief Set device number

+ * @param inode  I-node to set device number to

+ * @param dev    Device number

+ */

+void ext4_inode_set_dev(struct ext4_inode *inode, u32int dev);

+/**@brief return the type of i-node

+ * @param sb    Superblock

+ * @param inode I-node to return the type of

+ * @return Result of check operation

+ */

+u32int ext4_inode_type(struct ext4_sblock *sb, struct ext4_inode *inode);

+/**@brief Check if i-node has specified type.

+ * @param sb    Superblock

+ * @param inode I-node to check type of

+ * @param type  Type to check

+ * @return Result of check operation

+ */

+bool ext4_inode_is_type(struct ext4_sblock *sb, struct ext4_inode *inode,

+			u32int type);

+/**@brief Check if i-node has specified flag.

+ * @param inode I-node to check flags of

+ * @param flag  Flag to check

+ * @return Result of check operation

+ */

+bool ext4_inode_has_flag(struct ext4_inode *inode, u32int f);

+/**@brief Remove specified flag from i-node.

+ * @param inode      I-node to clear flag on

+ * @param clear_flag Flag to be cleared

+ */

+void ext4_inode_clear_flag(struct ext4_inode *inode, u32int f);

+/**@brief Set specified flag to i-node.

+ * @param inode    I-node to set flag on

+ * @param set_flag Flag to be set

+ */

+void ext4_inode_set_flag(struct ext4_inode *inode, u32int f);

+/**@brief Get inode checksum(crc32)

+ * @param sb    Superblock

+ * @param inode I-node to get checksum value from

+ */

+u32int

+ext4_inode_get_csum(struct ext4_sblock *sb, struct ext4_inode *inode);

+/**@brief Get inode checksum(crc32)

+ * @param sb    Superblock

+ * @param inode I-node to get checksum value from

+ */

+void

+ext4_inode_set_csum(struct ext4_sblock *sb, struct ext4_inode *inode,

+			u32int checksum);

+/**@brief Check if i-node can be truncated.

+ * @param sb    Superblock

+ * @param inode I-node to check

+ * @return Result of the check operation

+ */

+bool ext4_inode_can_truncate(struct ext4_sblock *sb, struct ext4_inode *inode);

+/**@brief Get extent header from the root of the extent tree.

+ * @param inode I-node to get extent header from

+ * @return Pointer to extent header of the root node

+ */

+struct ext4_extent_header *

+ext4_inode_get_extent_header(struct ext4_inode *inode);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_journal.h

@@ -1,0 +1,97 @@

+#pragma once

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "queue.h"

+#include "tree.h"

+struct jbd_fs {

+	struct ext4_blockdev *bdev;

+	struct ext4_inode_ref inode_ref;

+	struct jbd_sb sb;

+	bool dirty;

+};

+struct jbd_buf {

+	u32int jbd_lba;

+	struct ext4_block block;

+	struct jbd_trans *trans;

+	struct jbd_block_rec *block_rec;

+	TAILQ_ENTRY(jbd_buf) buf_node;

+	TAILQ_ENTRY(jbd_buf) dirty_buf_node;

+};

+struct jbd_revoke_rec {

+	ext4_fsblk_t lba;

+	RB_ENTRY(jbd_revoke_rec) revoke_node;

+};

+struct jbd_block_rec {

+	ext4_fsblk_t lba;

+	struct jbd_trans *trans;

+	RB_ENTRY(jbd_block_rec) block_rec_node;

+	LIST_ENTRY(jbd_block_rec) tbrec_node;

+	TAILQ_HEAD(jbd_buf_dirty, jbd_buf) dirty_buf_queue;

+};

+struct jbd_trans {

+	u32int trans_id;

+	u32int start_iblock;

+	int alloc_blocks;

+	int data_cnt;

+	u32int data_csum;

+	int written_cnt;

+	int error;

+	struct jbd_journal *journal;

+	TAILQ_HEAD(jbd_trans_buf, jbd_buf) buf_queue;

+	RB_HEAD(jbd_revoke_tree, jbd_revoke_rec) revoke_root;

+	LIST_HEAD(jbd_trans_block_rec, jbd_block_rec) tbrec_list;

+	TAILQ_ENTRY(jbd_trans) trans_node;

+};

+struct jbd_journal {

+	u32int first;

+	u32int start;

+	u32int last;

+	u32int trans_id;

+	u32int alloc_trans_id;

+	u32int block_size;

+	TAILQ_HEAD(jbd_cp_queue, jbd_trans) cp_queue;

+	RB_HEAD(jbd_block, jbd_block_rec) block_rec_root;

+	struct jbd_fs *jbd_fs;

+};

+int jbd_get_fs(struct ext4_fs *fs,

+	       struct jbd_fs *jbd_fs);

+int jbd_put_fs(struct jbd_fs *jbd_fs);

+int jbd_inode_bmap(struct jbd_fs *jbd_fs,

+		   ext4_lblk_t iblock,

+		   ext4_fsblk_t *fblock);

+int jbd_recover(struct jbd_fs *jbd_fs);

+int jbd_journal_start(struct jbd_fs *jbd_fs,

+		      struct jbd_journal *journal);

+int jbd_journal_stop(struct jbd_journal *journal);

+struct jbd_trans *

+jbd_journal_new_trans(struct jbd_journal *journal);

+int jbd_trans_set_block_dirty(struct jbd_trans *trans,

+			      struct ext4_block *block);

+int jbd_trans_revoke_block(struct jbd_trans *trans,

+			   ext4_fsblk_t lba);

+int jbd_trans_try_revoke_block(struct jbd_trans *trans,

+			       ext4_fsblk_t lba);

+void jbd_journal_free_trans(struct jbd_journal *journal,

+			    struct jbd_trans *trans,

+			    bool abort);

+int jbd_journal_commit_trans(struct jbd_journal *journal,

+			     struct jbd_trans *trans);

+void

+jbd_journal_purge_cp_trans(struct jbd_journal *journal,

+			   bool flush,

+			   bool once);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_mbr.h

@@ -1,0 +1,22 @@

+#pragma once

+#include "ext4_config.h"

+#include "ext4_blockdev.h"

+/**@brief Master boot record block devices descriptor*/

+struct ext4_mbr_bdevs {

+	struct ext4_blockdev partitions[4];

+};

+int ext4_mbr_scan(struct ext4_blockdev *parent, struct ext4_mbr_bdevs *bdevs);

+/**@brief Master boot record partitions*/

+struct ext4_mbr_parts {

+	/**@brief Percentage division tab:

+	 *  - {50, 20, 10, 20}

+	 * Sum of all 4 elements must be <= 100*/

+	u8int division[4];

+};

+int ext4_mbr_write(struct ext4_blockdev *parent, struct ext4_mbr_parts *parts, u32int disk_id);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_misc.h

@@ -1,0 +1,87 @@

+#pragma once

+#define EXT4_DIV_ROUND_UP(x, y) (((x) + (y) - 1)/(y))

+#define EXT4_ALIGN(x, y) ((y) * EXT4_DIV_ROUND_UP((x), (y)))

+/****************************Endian conversion*****************/

+static inline u64int reorder64(u64int n)

+{

+	return  ((n & 0xff) << 56) |

+		((n & 0xff00) << 40) |

+		((n & 0xff0000) << 24) |

+		((n & 0xff000000LL) << 8) |

+		((n & 0xff00000000LL) >> 8) |

+		((n & 0xff0000000000LL) >> 24) |

+		((n & 0xff000000000000LL) >> 40) |

+		((n & 0xff00000000000000LL) >> 56);

+}

+static inline u32int reorder32(u32int n)

+{

+	return  ((n & 0xff) << 24) |

+		((n & 0xff00) << 8) |

+		((n & 0xff0000) >> 8) |

+		((n & 0xff000000) >> 24);

+}

+static inline u16int reorder16(u16int n)

+{

+	return  ((n & 0xff) << 8) |

+		((n & 0xff00) >> 8);

+}

+#ifdef CONFIG_BIG_ENDIAN

+#define to_le64(_n) reorder64(_n)

+#define to_le32(_n) reorder32(_n)

+#define to_le16(_n) reorder16(_n)

+#define to_be64(_n) (_n)

+#define to_be32(_n) (_n)

+#define to_be16(_n) (_n)

+#else

+#define to_le64(_n) (_n)

+#define to_le32(_n) (_n)

+#define to_le16(_n) (_n)

+#define to_be64(_n) reorder64(_n)

+#define to_be32(_n) reorder32(_n)

+#define to_be16(_n) reorder16(_n)

+#endif

+/****************************Access macros to ext4 structures*****************/

+#define ext4_get32(s, f) to_le32((s)->f)

+#define ext4_get16(s, f) to_le16((s)->f)

+#define ext4_get8(s, f) (s)->f

+#define ext4_set32(s, f, v) \

+	do { \

+		(s)->f = to_le32(v); \

+	} while (0)

+#define ext4_set16(s, f, v) \

+	do { \

+		(s)->f = to_le16(v); \

+	} while (0)

+#define ext4_set8 \

+	(s, f, v) do { (s)->f = (v); } \

+	while (0)

+/****************************Access macros to jbd2 structures*****************/

+#define jbd_get32(s, f) to_be32((s)->f)

+#define jbd_get16(s, f) to_be16((s)->f)

+#define jbd_get8(s, f) (s)->f

+#define jbd_set32(s, f, v) \

+	do { \

+		(s)->f = to_be32(v); \

+	} while (0)

+#define jbd_set16(s, f, v) \

+	do { \

+		(s)->f = to_be16(v); \

+	} while (0)

+#define jbd_set8 \

+	(s, f, v) do { (s)->f = (v); } \

+	while (0)

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_mkfs.h

@@ -1,0 +1,49 @@

+#pragma once

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_blockdev.h"

+#include "ext4_fs.h"

+struct ext4_mkfs_info {

+	u64int len;

+	u32int block_size;

+	u32int blocks_per_group;

+	u32int inodes_per_group;

+	u32int inode_size;

+	u32int inodes;

+	u32int journal_blocks;

+	u32int feat_ro_compat;

+	u32int feat_compat;

+	u32int feat_incompat;

+	u32int bg_desc_reserve_blocks;

+	u16int dsc_size;

+	u8int uuid[UUID_SIZE];

+	bool journal;

+	char label[16];

+};

+struct fs_aux_info {

+    struct ext4_sblock *sb;

+    u8int *bg_desc_blk;

+    struct xattr_list_element *xattrs;

+    u32int first_data_block;

+    u64int len_blocks;

+    u32int inode_table_blocks;

+    u32int groups;

+    u32int bg_desc_blocks;

+    u32int default_i_flags;

+    u32int blocks_per_ind;

+    u32int blocks_per_dind;

+    u32int blocks_per_tind;

+};

+int create_fs_aux_info(struct fs_aux_info *aux_info, struct ext4_mkfs_info *info);

+void release_fs_aux_info(struct fs_aux_info *aux_info);

+int write_sblocks(struct ext4_blockdev *bd, struct fs_aux_info *aux_info, struct ext4_mkfs_info *info);

+int ext4_mkfs_read_info(struct ext4_blockdev *bd, struct ext4_mkfs_info *info);

+int ext4_mkfs(struct ext4_fs *fs, struct ext4_blockdev *bd,

+	      struct ext4_mkfs_info *info, int fs_type);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_super.h

@@ -1,0 +1,185 @@

+#pragma once

+#include "ext4_config.h"

+#include "ext4_types.h"

+#include "ext4_misc.h"

+/**@brief   Blocks count get stored in superblock.

+ * @param   s superblock descriptor

+ * @return  count of blocks*/

+static inline u64int ext4_sb_get_blocks_cnt(struct ext4_sblock *s)

+{

+	return ((u64int)to_le32(s->blocks_count_hi) << 32) |

+	       to_le32(s->blocks_count_lo);

+}

+/**@brief   Blocks count set  in superblock.

+ * @param   s superblock descriptor

+ * @return  count of blocks*/

+static inline void ext4_sb_set_blocks_cnt(struct ext4_sblock *s, u64int cnt)

+{

+	s->blocks_count_lo = to_le32((cnt << 32) >> 32);

+	s->blocks_count_hi = to_le32(cnt >> 32);

+}

+/**@brief   Free blocks count get stored in superblock.

+ * @param   s superblock descriptor

+ * @return  free blocks*/

+static inline u64int ext4_sb_get_free_blocks_cnt(struct ext4_sblock *s)

+{

+	return ((u64int)to_le32(s->free_blocks_count_hi) << 32) |

+	       to_le32(s->free_blocks_count_lo);

+}

+/**@brief   Free blocks count set.

+ * @param   s superblock descriptor

+ * @param   cnt new value of free blocks*/

+static inline void ext4_sb_set_free_blocks_cnt(struct ext4_sblock *s,

+					       u64int cnt)

+{

+	s->free_blocks_count_lo = to_le32((cnt << 32) >> 32);

+	s->free_blocks_count_hi = to_le32(cnt >> 32);

+}

+/**@brief   Block size get from superblock.

+ * @param   s superblock descriptor

+ * @return  block size in bytes*/

+static inline u32int ext4_sb_get_block_size(struct ext4_sblock *s)

+{

+	return 1024 << to_le32(s->log_block_size);

+}

+/**@brief   Block group descriptor size.

+ * @param   s superblock descriptor

+ * @return  block group descriptor size in bytes*/

+static inline u16int ext4_sb_get_desc_size(struct ext4_sblock *s)

+{

+	u16int size = to_le16(s->desc_size);

+	return size < EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE

+		   ? EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE

+		   : size;

+}

+/*************************Flags and features*********************************/

+/**@brief   Support check of flag.

+ * @param   s superblock descriptor

+ * @param   v flag to check

+ * @return  true if flag is supported*/

+static inline bool ext4_sb_check_flag(struct ext4_sblock *s, u32int v)

+{

+	return to_le32(s->flags) & v;

+}

+/**@brief   Support check of feature compatible.

+ * @param   s superblock descriptor

+ * @param   v feature to check

+ * @return  true if feature is supported*/

+static inline bool ext4_sb_feature_com(struct ext4_sblock *s, u32int v)

+{

+	return to_le32(s->features_compatible) & v;

+}

+/**@brief   Support check of feature incompatible.

+ * @param   s superblock descriptor

+ * @param   v feature to check

+ * @return  true if feature is supported*/

+static inline bool ext4_sb_feature_incom(struct ext4_sblock *s, u32int v)

+{

+	return to_le32(s->features_incompatible) & v;

+}

+/**@brief   Support check of read only flag.

+ * @param   s superblock descriptor

+ * @param   v flag to check

+ * @return  true if flag is supported*/

+static inline bool ext4_sb_feature_ro_com(struct ext4_sblock *s, u32int v)

+{

+	return to_le32(s->features_read_only) & v;

+}

+/**@brief   Block group to flex group.

+ * @param   s superblock descriptor

+ * @param   block_group block group

+ * @return  flex group id*/

+static inline u32int ext4_sb_bg_to_flex(struct ext4_sblock *s,

+					  u32int block_group)

+{

+	return block_group >> to_le32(s->log_groups_per_flex);

+}

+/**@brief   Flex block group size.

+ * @param   s superblock descriptor

+ * @return  flex bg size*/

+static inline u32int ext4_sb_flex_bg_size(struct ext4_sblock *s)

+{

+	return 1 << to_le32(s->log_groups_per_flex);

+}

+/**@brief   Return first meta block group id.

+ * @param   s superblock descriptor

+ * @return  first meta_bg id */

+static inline u32int ext4_sb_first_meta_bg(struct ext4_sblock *s)

+{

+	return to_le32(s->first_meta_bg);

+}

+/**************************More complex functions****************************/

+/**@brief   Returns a block group count.

+ * @param   s superblock descriptor

+ * @return  count of block groups*/

+u32int ext4_block_group_cnt(struct ext4_sblock *s);

+/**@brief   Returns block count in block group

+ *          (last block group may have less blocks)

+ * @param   s superblock descriptor

+ * @param   bgid block group id

+ * @return  blocks count*/

+u32int ext4_blocks_in_group_cnt(struct ext4_sblock *s, u32int bgid);

+/**@brief   Returns inodes count in block group

+ *          (last block group may have less inodes)

+ * @param   s superblock descriptor

+ * @param   bgid block group id

+ * @return  inodes count*/

+u32int ext4_inodes_in_group_cnt(struct ext4_sblock *s, u32int bgid);

+/***************************Read/write/check superblock**********************/

+/**@brief   Superblock write.

+ * @param   bdev block device descriptor.

+ * @param   s superblock descriptor

+ * @return  Standard error code */

+int ext4_sb_write(struct ext4_blockdev *bdev, struct ext4_sblock *s);

+/**@brief   Superblock read.

+ * @param   bdev block device descriptor.

+ * @param   s superblock descriptor

+ * @return  Standard error code */

+int ext4_sb_read(struct ext4_blockdev *bdev, struct ext4_sblock *s);

+/**@brief   Superblock simple validation.

+ * @param   s superblock descriptor

+ * @return  true if OK*/

+bool ext4_sb_check(struct ext4_sblock *s);

+/**@brief   Superblock presence in block group.

+ * @param   s superblock descriptor

+ * @param   block_group block group id

+ * @return  true if block group has superblock*/

+bool ext4_sb_is_super_in_bg(struct ext4_sblock *s, u32int block_group);

+/**@brief   TODO:*/

+bool ext4_sb_sparse(u32int group);

+/**@brief   TODO:*/

+u32int ext4_bg_num_gdb(struct ext4_sblock *s, u32int group);

+/**@brief   TODO:*/

+u32int ext4_num_base_meta_clusters(struct ext4_sblock *s,

+				     u32int block_group);

+/**@brief   TODO:*/

+void ext4_sb_set_csum(struct ext4_sblock *s);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_trans.h

@@ -1,0 +1,38 @@

+#pragma once

+#include "ext4_config.h"

+#include "ext4_types.h"

+/**@brief   Mark a buffer dirty and add it to the current transaction.

+ * @param   buf buffer

+ * @return  standard error code*/

+int ext4_trans_set_block_dirty(struct ext4_buf *buf);

+/**@brief   Block get function (through cache, don't read).

+ *          jbd_trans_get_access would be called in order to

+ *          get write access to the buffer.

+ * @param   bdev block device descriptor

+ * @param   b block descriptor

+ * @param   lba logical block address

+ * @return  standard error code*/

+int ext4_trans_block_get_noread(struct ext4_blockdev *bdev,

+			  struct ext4_block *b,

+			  u64int lba);

+/**@brief   Block get function (through cache).

+ *          jbd_trans_get_access would be called in order to

+ *          get write access to the buffer.

+ * @param   bdev block device descriptor

+ * @param   b block descriptor

+ * @param   lba logical block address

+ * @return  standard error code*/

+int ext4_trans_block_get(struct ext4_blockdev *bdev,

+		   struct ext4_block *b,

+		   u64int lba);

+/**@brief  Try to add block to be revoked to the current transaction.

+ * @param  bdev block device descriptor

+ * @param  lba logical block address

+ * @return standard error code*/

+int ext4_trans_try_revoke_block(struct ext4_blockdev *bdev,

+			       u64int lba);

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/ext4_types.h

@@ -1,0 +1,833 @@

+#pragma once

+#include "ext4_blockdev.h"

+#include "tree.h"

+/*

+ * Types of blocks.

+ */

+typedef u32int ext4_lblk_t;

+typedef u64int ext4_fsblk_t;

+#define EXT4_CHECKSUM_CRC32C 1

+#define UUID_SIZE 16

+#pragma pack on

+/*

+ * Structure of the super block

+ */

+struct ext4_sblock {

+	u32int inodes_count;		   /* I-nodes count */

+	u32int blocks_count_lo;	  /* Blocks count */

+	u32int reserved_blocks_count_lo; /* Reserved blocks count */

+	u32int free_blocks_count_lo;     /* Free blocks count */

+	u32int free_inodes_count;	/* Free inodes count */

+	u32int first_data_block;	 /* First Data Block */

+	u32int log_block_size;	   /* Block size */

+	u32int log_cluster_size;	 /* Obsoleted fragment size */

+	u32int blocks_per_group;	 /* Number of blocks per group */

+	u32int frags_per_group;	  /* Obsoleted fragments per group */

+	u32int inodes_per_group;	 /* Number of inodes per group */

+	u32int mount_time;		   /* Mount time */

+	u32int write_time;		   /* Write time */

+	u16int mount_count;		   /* Mount count */

+	u16int max_mount_count;	  /* Maximal mount count */

+	u16int magic;			   /* Magic signature */

+	u16int state;			   /* File system state */

+	u16int errors;		   /* Behavior when detecting errors */

+	u16int minor_rev_level;	  /* Minor revision level */

+	u32int last_check_time;	  /* Time of last check */

+	u32int check_interval;	   /* Maximum time between checks */

+	u32int creator_os;		   /* Creator OS */

+	u32int rev_level;		   /* Revision level */

+	u16int def_resuid;		   /* Default uid for reserved blocks */

+	u16int def_resgid;		   /* Default gid for reserved blocks */

+	/* Fields for EXT4_DYNAMIC_REV superblocks only. */

+	u32int first_inode;	 /* First non-reserved inode */

+	u16int inode_size;	  /* Size of inode structure */

+	u16int block_group_index;   /* Block group index of this superblock */

+	u32int features_compatible; /* Compatible feature set */

+	u32int features_incompatible;  /* Incompatible feature set */

+	u32int features_read_only;     /* Readonly-compatible feature set */

+	u8int uuid[UUID_SIZE];		 /* 128-bit uuid for volume */

+	char volume_name[16];		 /* Volume name */

+	char last_mounted[64];		 /* Directory where last mounted */

+	u32int algorithm_usage_bitmap; /* For compression */

+	/*

+	 * Performance hints. Directory preallocation should only

+	 * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.

+	 */

+	u8int s_prealloc_blocks; /* Number of blocks to try to preallocate */

+	u8int s_prealloc_dir_blocks;  /* Number to preallocate for dirs */

+	u16int s_reserved_gdt_blocks; /* Per group desc for online growth */

+	/*

+	 * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.

+	 */

+	u8int journal_uuid[UUID_SIZE];      /* UUID of journal superblock */

+	u32int journal_inode_number; /* Inode number of journal file */

+	u32int journal_dev;	  /* Device number of journal file */

+	u32int last_orphan;	  /* Head of list of inodes to delete */

+	u32int hash_seed[4];	 /* HTREE hash seed */

+	u8int default_hash_version;  /* Default hash version to use */

+	u8int journal_backup_type;

+	u16int desc_size;	  /* Size of group descriptor */

+	u32int default_mount_opts; /* Default mount options */

+	u32int first_meta_bg;      /* First metablock block group */

+	u32int mkfs_time;	  /* When the filesystem was created */

+	u32int journal_blocks[17]; /* Backup of the journal inode */

+	/* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */

+	u32int blocks_count_hi;	  /* Blocks count */

+	u32int reserved_blocks_count_hi; /* Reserved blocks count */

+	u32int free_blocks_count_hi;     /* Free blocks count */

+	u16int min_extra_isize;    /* All inodes have at least # bytes */

+	u16int want_extra_isize;   /* New inodes should reserve # bytes */

+	u32int flags;		     /* Miscellaneous flags */

+	u16int raid_stride;	/* RAID stride */

+	u16int mmp_interval;       /* # seconds to wait in MMP checking */

+	u64int mmp_block;	  /* Block for multi-mount protection */

+	u32int raid_stripe_width;  /* Blocks on all data disks (N * stride) */

+	u8int log_groups_per_flex; /* FLEX_BG group size */

+	u8int checksum_type;

+	u16int reserved_pad;

+	u64int kbytes_written; /* Number of lifetime kilobytes written */

+	u32int snapshot_inum;  /* I-node number of active snapshot */

+	u32int snapshot_id;    /* Sequential ID of active snapshot */

+	u64int

+	    snapshot_r_blocks_count; /* Reserved blocks for active snapshot's

+					future use */

+	u32int

+	    snapshot_list; /* I-node number of the head of the on-disk snapshot

+			      list */

+	u32int error_count;	 /* Number of file system errors */

+	u32int first_error_time;    /* First time an error happened */

+	u32int first_error_ino;     /* I-node involved in first error */

+	u64int first_error_block;   /* Block involved of first error */

+	u8int first_error_func[32]; /* Function where the error happened */

+	u32int first_error_line;    /* Line number where error happened */

+	u32int last_error_time;     /* Most recent time of an error */

+	u32int last_error_ino;      /* I-node involved in last error */

+	u32int last_error_line;     /* Line number where error happened */

+	u64int last_error_block;    /* Block involved of last error */

+	u8int last_error_func[32];  /* Function where the error happened */

+	u8int mount_opts[64];

+	u32int usr_quota_inum;	/* inode for tracking user quota */

+	u32int grp_quota_inum;	/* inode for tracking group quota */

+	u32int overhead_clusters;	/* overhead blocks/clusters in fs */

+	u32int backup_bgs[2];	/* groups with sparse_super2 SBs */

+	u8int  encrypt_algos[4];	/* Encryption algorithms in use  */

+	u8int  encrypt_pw_salt[16];	/* Salt used for string2key algorithm */

+	u32int lpf_ino;		/* Location of the lost+found inode */

+	u32int padding[100];	/* Padding to the end of the block */

+	u32int checksum;		/* crc32c(superblock) */

+};

+#pragma pack off

+#define EXT4_SUPERBLOCK_MAGIC 0xEF53

+#define EXT4_SUPERBLOCK_SIZE 1024

+#define EXT4_SUPERBLOCK_OFFSET 1024

+#define EXT4_SUPERBLOCK_OS_LINUX 0

+#define EXT4_SUPERBLOCK_OS_HURD 1

+/*

+ * Misc. filesystem flags

+ */

+#define EXT4_SUPERBLOCK_FLAGS_SIGNED_HASH 0x0001

+#define EXT4_SUPERBLOCK_FLAGS_UNSIGNED_HASH 0x0002

+#define EXT4_SUPERBLOCK_FLAGS_TEST_FILESYS 0x0004

+/*

+ * Filesystem states

+ */

+#define EXT4_SUPERBLOCK_STATE_VALID_FS 0x0001  /* Unmounted cleanly */

+#define EXT4_SUPERBLOCK_STATE_ERROR_FS 0x0002  /* Errors detected */

+#define EXT4_SUPERBLOCK_STATE_ORPHAN_FS 0x0004 /* Orphans being recovered */

+/*

+ * Behaviour when errors detected

+ */

+#define EXT4_SUPERBLOCK_ERRORS_CONTINUE 1 /* Continue execution */

+#define EXT4_SUPERBLOCK_ERRORS_RO 2       /* Remount fs read-only */

+#define EXT4_SUPERBLOCK_ERRORS_PANIC 3    /* Panic */

+#define EXT4_SUPERBLOCK_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE

+/*

+ * Compatible features

+ */

+#define EXT4_FCOM_DIR_PREALLOC 0x0001

+#define EXT4_FCOM_IMAGIC_INODES 0x0002

+#define EXT4_FCOM_HAS_JOURNAL 0x0004

+#define EXT4_FCOM_EXT_ATTR 0x0008

+#define EXT4_FCOM_RESIZE_INODE 0x0010

+#define EXT4_FCOM_DIR_INDEX 0x0020

+/*

+ * Read-only compatible features

+ */

+#define EXT4_FRO_COM_SPARSE_SUPER 0x0001

+#define EXT4_FRO_COM_LARGE_FILE 0x0002

+#define EXT4_FRO_COM_BTREE_DIR 0x0004

+#define EXT4_FRO_COM_HUGE_FILE 0x0008

+#define EXT4_FRO_COM_GDT_CSUM 0x0010

+#define EXT4_FRO_COM_DIR_NLINK 0x0020

+#define EXT4_FRO_COM_EXTRA_ISIZE 0x0040

+#define EXT4_FRO_COM_QUOTA 0x0100

+#define EXT4_FRO_COM_BIGALLOC 0x0200

+#define EXT4_FRO_COM_METADATA_CSUM 0x0400

+/*

+ * Incompatible features

+ */

+#define EXT4_FINCOM_COMPRESSION 0x0001

+#define EXT4_FINCOM_FILETYPE 0x0002

+#define EXT4_FINCOM_RECOVER 0x0004     /* Needs recovery */

+#define EXT4_FINCOM_JOURNAL_DEV 0x0008 /* Journal device */

+#define EXT4_FINCOM_META_BG 0x0010

+#define EXT4_FINCOM_EXTENTS 0x0040 /* extents support */

+#define EXT4_FINCOM_64BIT 0x0080

+#define EXT4_FINCOM_MMP 0x0100

+#define EXT4_FINCOM_FLEX_BG 0x0200

+#define EXT4_FINCOM_EA_INODE 0x0400	 /* EA in inode */

+#define EXT4_FINCOM_DIRDATA 0x1000	  /* data in dirent */

+#define EXT4_FINCOM_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */

+#define EXT4_FINCOM_LARGEDIR 0x4000	 /* >2GB or 3-lvl htree */

+#define EXT4_FINCOM_INLINE_DATA 0x8000      /* data in inode */

+/*

+ * EXT2 supported feature set

+ */

+#define EXT2_SUPPORTED_FCOM 0x0000

+#define EXT2_SUPPORTED_FINCOM                                   \

+	(EXT4_FINCOM_FILETYPE | EXT4_FINCOM_META_BG)

+#define EXT2_SUPPORTED_FRO_COM                                  \

+	(EXT4_FRO_COM_SPARSE_SUPER |                            \

+	 EXT4_FRO_COM_LARGE_FILE)

+/*

+ * EXT3 supported feature set

+ */

+#define EXT3_SUPPORTED_FCOM (EXT4_FCOM_DIR_INDEX)

+#define EXT3_SUPPORTED_FINCOM                                 \

+	(EXT4_FINCOM_FILETYPE | EXT4_FINCOM_META_BG)

+#define EXT3_SUPPORTED_FRO_COM                                \

+	(EXT4_FRO_COM_SPARSE_SUPER | EXT4_FRO_COM_LARGE_FILE)

+/*

+ * EXT4 supported feature set

+ */

+#define EXT4_SUPPORTED_FCOM (EXT4_FCOM_DIR_INDEX)

+#define EXT4_SUPPORTED_FINCOM ( \

+	EXT4_FINCOM_FILETYPE | EXT4_FINCOM_META_BG |      \

+	EXT4_FINCOM_EXTENTS | EXT4_FINCOM_FLEX_BG |       \

+	EXT4_FINCOM_64BIT \

+)

+#define EXT4_SUPPORTED_FRO_COM ( \

+	EXT4_FRO_COM_SPARSE_SUPER |                       \

+	EXT4_FRO_COM_METADATA_CSUM |                      \

+	EXT4_FRO_COM_LARGE_FILE | EXT4_FRO_COM_GDT_CSUM | \

+	EXT4_FRO_COM_DIR_NLINK |                          \

+	EXT4_FRO_COM_EXTRA_ISIZE | EXT4_FRO_COM_HUGE_FILE \

+)

+/*Ignored features:

+ * RECOVER - journaling in lwext4 is not supported

+ *           (probably won't be ever...)

+ * MMP - multi-mout protection (impossible scenario)

+ * */

+#define EXT_FINCOM_IGNORED ( \

+	EXT4_FINCOM_RECOVER | \

+	EXT4_FINCOM_MMP | \

+	EXT4_FINCOM_BG_USE_META_CSUM \

+)

+/*

+// TODO: Features incompatible to implement

+#define EXT4_SUPPORTED_FINCOM

+                     (EXT4_FINCOM_INLINE_DATA)

+// TODO: Features read only to implement

+#define EXT4_SUPPORTED_FRO_COM

+                     EXT4_FRO_COM_BIGALLOC |\

+                     EXT4_FRO_COM_QUOTA)

+*/

+/* Inode table/bitmap not in use */

+#define EXT4_BLOCK_GROUP_INODE_UNINIT 0x0001

+/* Block bitmap not in use */

+#define EXT4_BLOCK_GROUP_BLOCK_UNINIT 0x0002

+/* On-disk itable initialized to zero */

+#define EXT4_BLOCK_GROUP_ITABLE_ZEROED 0x0004

+#pragma pack on

+/*

+ * Structure of a blocks group descriptor

+ */

+struct ext4_bgroup {

+	u32int block_bitmap_lo;	    /* Blocks bitmap block */

+	u32int inode_bitmap_lo;	    /* Inodes bitmap block */

+	u32int inode_table_first_block_lo; /* Inodes table block */

+	u16int free_blocks_count_lo;       /* Free blocks count */

+	u16int free_inodes_count_lo;       /* Free inodes count */

+	u16int used_dirs_count_lo;	 /* Directories count */

+	u16int flags;		       /* EXT4_BG_flags (INODE_UNINIT, etc) */

+	u32int exclude_bitmap_lo;    /* Exclude bitmap for snapshots */

+	u16int block_bitmap_csum_lo; /* crc32c(s_uuid+grp_num+bbitmap) LE */

+	u16int inode_bitmap_csum_lo; /* crc32c(s_uuid+grp_num+ibitmap) LE */

+	u16int itable_unused_lo;     /* Unused inodes count */

+	u16int checksum;	     /* crc16(sb_uuid+group+desc) */

+	u32int block_bitmap_hi;	    /* Blocks bitmap block MSB */

+	u32int inode_bitmap_hi;	    /* I-nodes bitmap block MSB */

+	u32int inode_table_first_block_hi; /* I-nodes table block MSB */

+	u16int free_blocks_count_hi;       /* Free blocks count MSB */

+	u16int free_inodes_count_hi;       /* Free i-nodes count MSB */

+	u16int used_dirs_count_hi;	 /* Directories count MSB */

+	u16int itable_unused_hi;	   /* Unused inodes count MSB */

+	u32int exclude_bitmap_hi;	  /* Exclude bitmap block MSB */

+	u16int block_bitmap_csum_hi; /* crc32c(s_uuid+grp_num+bbitmap) BE */

+	u16int inode_bitmap_csum_hi; /* crc32c(s_uuid+grp_num+ibitmap) BE */

+	u32int reserved;	     /* Padding */

+};

+#pragma pack off

+#define EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE 32

+#define EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE 64

+#define EXT4_MIN_BLOCK_SIZE 1024  /* 1 KiB */

+#define EXT4_MAX_BLOCK_SIZE 65536 /* 64 KiB */

+#define EXT4_REV0_INODE_SIZE 128

+#define EXT4_INODE_BLOCK_SIZE 512

+#define EXT4_INODE_DIRECT_BLOCK_COUNT 12

+#define EXT4_INODE_INDIRECT_BLOCK EXT4_INODE_DIRECT_BLOCK_COUNT

+#define EXT4_INODE_DOUBLE_INDIRECT_BLOCK (EXT4_INODE_INDIRECT_BLOCK + 1)

+#define EXT4_INODE_TRIPPLE_INDIRECT_BLOCK (EXT4_INODE_DOUBLE_INDIRECT_BLOCK + 1)

+#define EXT4_INODE_BLOCKS (EXT4_INODE_TRIPPLE_INDIRECT_BLOCK + 1)

+#define EXT4_INODE_INDIRECT_BLOCK_COUNT                                        \

+	(EXT4_INODE_BLOCKS - EXT4_INODE_DIRECT_BLOCK_COUNT)

+#pragma pack on

+/*

+ * Structure of an inode on the disk

+ */

+struct ext4_inode {

+	u16int mode;		    /* File mode */

+	u16int uid;		    /* Low 16 bits of owner uid */

+	u32int size_lo;	   /* Size in bytes */

+	u32int access_time;       /* Access time */

+	u32int change_inode_time; /* I-node change time */

+	u32int modification_time; /* Modification time */

+	u32int deletion_time;     /* Deletion time */

+	u16int gid;		    /* Low 16 bits of group id */

+	u16int links_count;       /* Links count */

+	u32int blocks_count_lo;   /* Blocks count */

+	u32int flags;		    /* File flags */

+	u32int unused_osd1;       /* OS dependent - not used in HelenOS */

+	u32int blocks[EXT4_INODE_BLOCKS]; /* Pointers to blocks */

+	u32int generation;		    /* File version (for NFS) */

+	u32int file_acl_lo;		    /* File ACL */

+	u32int size_hi;

+	u32int obso_faddr; /* Obsoleted fragment address */

+	union {

+		struct {

+			u16int blocks_high;

+			u16int file_acl_high;

+			u16int uid_high;

+			u16int gid_high;

+			u16int checksum_lo; /* crc32c(uuid+inum+inode) LE */

+			u16int reserved2;

+		} linux2;

+		struct {

+			u16int reserved1;

+			u16int mode_high;

+			u16int uid_high;

+			u16int gid_high;

+			u32int author;

+		} hurd2;

+	} osd2;

+	u16int extra_isize;

+	u16int checksum_hi;	/* crc32c(uuid+inum+inode) BE */

+	u32int ctime_extra; /* Extra change time (nsec << 2 | epoch) */

+	u32int mtime_extra; /* Extra Modification time (nsec << 2 | epoch) */

+	u32int atime_extra; /* Extra Access time (nsec << 2 | epoch) */

+	u32int crtime;      /* File creation time */

+	u32int

+	    crtime_extra;    /* Extra file creation time (nsec << 2 | epoch) */

+	u32int version_hi; /* High 32 bits for 64-bit version */

+};

+#pragma pack off

+#define EXT4_INODE_MODE_FIFO 0x1000

+#define EXT4_INODE_MODE_CHARDEV 0x2000

+#define EXT4_INODE_MODE_DIRECTORY 0x4000

+#define EXT4_INODE_MODE_BLOCKDEV 0x6000

+#define EXT4_INODE_MODE_FILE 0x8000

+#define EXT4_INODE_MODE_SOFTLINK 0xA000

+#define EXT4_INODE_MODE_SOCKET 0xC000

+#define EXT4_INODE_MODE_TYPE_MASK 0xF000

+/*

+ * Inode flags

+ */

+#define EXT4_INODE_FLAG_SECRM 0x00000001     /* Secure deletion */

+#define EXT4_INODE_FLAG_UNRM 0x00000002      /* Undelete */

+#define EXT4_INODE_FLAG_COMPR 0x00000004     /* Compress file */

+#define EXT4_INODE_FLAG_SYNC 0x00000008      /* Synchronous updates */

+#define EXT4_INODE_FLAG_IMMUTABLE 0x00000010 /* Immutable file */

+#define EXT4_INODE_FLAG_APPEND 0x00000020  /* writes to file may only append */

+#define EXT4_INODE_FLAG_NODUMP 0x00000040  /* do not dump file */

+#define EXT4_INODE_FLAG_NOATIME 0x00000080 /* do not update atime */

+/* Compression flags */

+#define EXT4_INODE_FLAG_DIRTY 0x00000100

+#define EXT4_INODE_FLAG_COMPRBLK                                               \

+	0x00000200			   /* One or more compressed clusters */

+#define EXT4_INODE_FLAG_NOCOMPR 0x00000400 /* Don't compress */

+#define EXT4_INODE_FLAG_ECOMPR 0x00000800  /* Compression error */

+#define EXT4_INODE_FLAG_INDEX 0x00001000  /* hash-indexed directory */

+#define EXT4_INODE_FLAG_IMAGIC 0x00002000 /* AFS directory */

+#define EXT4_INODE_FLAG_JOURNAL_DATA                                           \

+	0x00004000			  /* File data should be journaled */

+#define EXT4_INODE_FLAG_NOTAIL 0x00008000 /* File tail should not be merged */

+#define EXT4_INODE_FLAG_DIRSYNC                                                \

+	0x00010000 /* Dirsync behaviour (directories only) */

+#define EXT4_INODE_FLAG_TOPDIR 0x00020000    /* Top of directory hierarchies */

+#define EXT4_INODE_FLAG_HUGE_FILE 0x00040000 /* Set to each huge file */

+#define EXT4_INODE_FLAG_EXTENTS 0x00080000   /* Inode uses extents */

+#define EXT4_INODE_FLAG_EA_INODE 0x00200000  /* Inode used for large EA */

+#define EXT4_INODE_FLAG_EOFBLOCKS 0x00400000 /* Blocks allocated beyond EOF */

+#define EXT4_INODE_FLAG_RESERVED 0x80000000  /* reserved for ext4 lib */

+#define EXT4_INODE_ROOT_INDEX 2

+#define EXT4_DIRECTORY_FILENAME_LEN 255

+/**@brief   Directory entry types. */

+enum { EXT4_DE_UNKNOWN = 0,

+       EXT4_DE_REG_FILE,

+       EXT4_DE_DIR,

+       EXT4_DE_CHRDEV,

+       EXT4_DE_BLKDEV,

+       EXT4_DE_FIFO,

+       EXT4_DE_SOCK,

+       EXT4_DE_SYMLINK };

+#define EXT4_DIRENTRY_DIR_CSUM 0xDE

+#pragma pack on

+union ext4_dir_en_internal {

+	u8int name_length_high; /* Higher 8 bits of name length */

+	u8int inode_type;       /* Type of referenced inode (in rev >= 0.5) */

+};

+/**

+ * Linked list directory entry structure

+ */

+struct ext4_dir_en {

+	u32int inode;	/* I-node for the entry */

+	u16int entry_len; /* Distance to the next directory entry */

+	u8int name_len;   /* Lower 8 bits of name length */

+	union ext4_dir_en_internal in;

+	u8int name[]; /* Entry name */

+};

+/* Structures for indexed directory */

+struct ext4_dir_idx_climit {

+	u16int limit;

+	u16int count;

+};

+struct ext4_dir_idx_dot_en {

+	u32int inode;

+	u16int entry_length;

+	u8int name_length;

+	u8int inode_type;

+	u8int name[4];

+};

+struct ext4_dir_idx_rinfo {

+	u32int reserved_zero;

+	u8int hash_version;

+	u8int info_length;

+	u8int indirect_levels;

+	u8int unused_flags;

+};

+struct ext4_dir_idx_entry {

+	u32int hash;

+	u32int block;

+};

+struct ext4_dir_idx_root {

+	struct ext4_dir_idx_dot_en dots[2];

+	struct ext4_dir_idx_rinfo info;

+	struct ext4_dir_idx_entry en[];

+};

+struct ext4_fake_dir_entry {

+	u32int inode;

+	u16int entry_length;

+	u8int name_length;

+	u8int inode_type;

+};

+struct ext4_dir_idx_node {

+	struct ext4_fake_dir_entry fake;

+	struct ext4_dir_idx_entry entries[];

+};

+/*

+ * This goes at the end of each htree block.

+ */

+struct ext4_dir_idx_tail {

+	u32int reserved;

+	u32int checksum;	/* crc32c(uuid+inum+dirblock) */

+};

+/*

+ * This is a bogus directory entry at the end of each leaf block that

+ * records checksums.

+ */

+struct ext4_dir_entry_tail {

+	u32int reserved_zero1;	/* Pretend to be unused */

+	u16int rec_len;		/* 12 */

+	u8int reserved_zero2;	/* Zero name length */

+	u8int reserved_ft;	/* 0xDE, fake file type */

+	u32int checksum;		/* crc32c(uuid+inum+dirblock) */

+};

+#pragma pack off

+#define EXT4_DIRENT_TAIL(block, blocksize) \

+	((struct ext4_dir_entry_tail *)(((char *)(block)) + ((blocksize) - \

+					sizeof(struct ext4_dir_entry_tail))))

+#define EXT4_ERR_BAD_DX_DIR (-25000)

+#define EXT4_ERR_NOT_FOUND (-25001)

+#define EXT4_LINK_MAX 65000

+#define EXT4_BAD_INO 1

+#define EXT4_ROOT_INO 2

+#define EXT4_BOOT_LOADER_INO 5

+#define EXT4_UNDEL_DIR_INO 6

+#define EXT4_RESIZE_INO 7

+#define EXT4_JOURNAL_INO 8

+#define EXT4_GOOD_OLD_FIRST_INO 11

+#pragma pack on

+/*

+ * This is the extent tail on-disk structure.

+ * All other extent structures are 12 bytes long.  It turns out that

+ * block size % 12 >= 4 for at least all powers of 2 greater than 512, which

+ * covers all valid ext4 block sizes.  Therefore, this tail structure can be

+ * crammed into the end of the block without having to rebalance the tree.

+ */

+struct ext4_extent_tail

+{

+	u32int checksum;	/* crc32c(uuid+inum+extent_block) */

+};

+/*

+ * This is the extent on-disk structure.

+ * It's used at the bottom of the tree.

+ */

+struct ext4_extent {

+	u32int iblock;	/* First logical block extent covers */

+	u16int nblocks;	/* Number of blocks covered by extent */

+	u16int fblock_hi;	/* High 16 bits of physical block */

+	u32int fblock_lo;	/* Low 32 bits of physical block */

+};

+/*

+ * This is index on-disk structure.

+ * It's used at all the levels except the bottom.

+ */

+struct ext4_extent_index {

+	u32int iblock; /* Index covers logical blocks from 'block' */

+	/**

+	 * Pointer to the physical block of the next

+	 * level. leaf or next index could be there

+	 * high 16 bits of physical block

+	 */

+	u32int fblock_lo;

+	u16int fblock_hi;

+	u16int padding;

+};

+/*

+ * Each block (leaves and indexes), even inode-stored has header.

+ */

+struct ext4_extent_header {

+	u16int magic;

+	u16int nentries;	/* Number of valid entries */

+	u16int max_nentries;	/* Capacity of store in entries */

+	u16int depth;		/* Has tree real underlying blocks? */

+	u32int generation;	/* generation of the tree */

+};

+#pragma pack off

+#define EXT4_EXTENT_MAGIC 0xF30A

+/******************************************************************************/

+/* EXT3 HTree directory indexing */

+#define EXT2_HTREE_LEGACY 0

+#define EXT2_HTREE_HALF_MD4 1

+#define EXT2_HTREE_TEA 2

+#define EXT2_HTREE_LEGACY_UNSIGNED 3

+#define EXT2_HTREE_HALF_MD4_UNSIGNED 4

+#define EXT2_HTREE_TEA_UNSIGNED 5

+#define EXT2_HTREE_EOF 0x7FFFFFFFUL

+#define EXT4_GOOD_OLD_INODE_SIZE	128

+/*****************************************************************************/

+/*

+ * JBD stores integers in big endian.

+ */

+#define JBD_MAGIC_NUMBER 0xc03b3998U /* The first 4 bytes of /dev/random! */

+/*

+ * Descriptor block types:

+ */

+#define JBD_DESCRIPTOR_BLOCK	1

+#define JBD_COMMIT_BLOCK	2

+#define JBD_SUPERBLOCK		3

+#define JBD_SUPERBLOCK_V2	4

+#define JBD_REVOKE_BLOCK	5

+#pragma pack on

+/*

+ * Standard header for all descriptor blocks:

+ */

+struct jbd_bhdr {

+	u32int		magic;

+	u32int		blocktype;

+	u32int		sequence;

+};

+#pragma pack off

+/*

+ * Checksum types.

+ */

+#define JBD_CRC32_CHKSUM   1

+#define JBD_MD5_CHKSUM     2

+#define JBD_SHA1_CHKSUM    3

+#define JBD_CRC32C_CHKSUM  4

+#define JBD_CRC32_CHKSUM_SIZE 4

+#define JBD_CHECKSUM_BYTES (32 / sizeof(u32int))

+#pragma pack on

+/*

+ * Commit block header for storing transactional checksums:

+ *

+ * NOTE: If FEATURE_COMPAT_CHECKSUM (checksum v1) is set, the h_chksum*

+ * fields are used to store a checksum of the descriptor and data blocks.

+ *

+ * If FEATURE_INCOMPAT_CSUM_V2 (checksum v2) is set, then the h_chksum

+ * field is used to store crc32c(uuid+commit_block).  Each journal metadata

+ * block gets its own checksum, and data block checksums are stored in

+ * journal_block_tag (in the descriptor).  The other h_chksum* fields are

+ * not used.

+ *

+ * If FEATURE_INCOMPAT_CSUM_V3 is set, the descriptor block uses

+ * journal_block_tag3_t to store a full 32-bit checksum.  Everything else

+ * is the same as v2.

+ *

+ * Checksum v1, v2, and v3 are mutually exclusive features.

+ */

+struct jbd_commit_header {

+	struct jbd_bhdr header;

+	u8int chksum_type;

+	u8int chksum_size;

+	u8int padding[2];

+	u32int		chksum[JBD_CHECKSUM_BYTES];

+	u64int		commit_sec;

+	u32int		commit_nsec;

+};

+/*

+ * The block tag: used to describe a single buffer in the journal

+ */

+struct jbd_block_tag3 {

+	u32int		blocknr;	/* The on-disk block number */

+	u32int		flags;	/* See below */

+	u32int		blocknr_high; /* most-significant high 32bits. */

+	u32int		checksum;	/* crc32c(uuid+seq+block) */

+};

+struct jbd_block_tag {

+	u32int		blocknr;	/* The on-disk block number */

+	u16int		checksum;	/* truncated crc32c(uuid+seq+block) */

+	u16int		flags;	/* See below */

+	u32int		blocknr_high; /* most-significant high 32bits. */

+};

+#pragma pack off

+/* Definitions for the journal tag flags word: */

+#define JBD_FLAG_ESCAPE		1	/* on-disk block is escaped */

+#define JBD_FLAG_SAME_UUID	2	/* block has same uuid as previous */

+#define JBD_FLAG_DELETED	4	/* block deleted by this transaction */

+#define JBD_FLAG_LAST_TAG	8	/* last tag in this descriptor block */

+#pragma pack on

+/* Tail of descriptor block, for checksumming */

+struct jbd_block_tail {

+	u32int	checksum;

+};

+/*

+ * The revoke descriptor: used on disk to describe a series of blocks to

+ * be revoked from the log

+ */

+struct jbd_revoke_header {

+	struct jbd_bhdr  header;

+	u32int	 count;	/* Count of bytes used in the block */

+};

+/* Tail of revoke block, for checksumming */

+struct jbd_revoke_tail {

+	u32int		checksum;

+};

+#pragma pack off

+#define JBD_USERS_MAX 48

+#define JBD_USERS_SIZE (UUID_SIZE * JBD_USERS_MAX)

+#pragma pack on

+/*

+ * The journal superblock.  All fields are in big-endian byte order.

+ */

+struct jbd_sb {

+/* 0x0000 */

+	struct jbd_bhdr header;

+/* 0x000C */

+	/* Static information describing the journal */

+	u32int	blocksize;		/* journal device blocksize */

+	u32int	maxlen;		/* total blocks in journal file */

+	u32int	first;		/* first block of log information */

+/* 0x0018 */

+	/* Dynamic information describing the current state of the log */

+	u32int	sequence;		/* first commit ID expected in log */

+	u32int	start;		/* blocknr of start of log */

+/* 0x0020 */

+	/* Error value, as set by journal_abort(). */

+	s32int 	error_val;

+/* 0x0024 */

+	/* Remaining fields are only valid in a version-2 superblock */

+	u32int	feature_compat; 	/* compatible feature set */

+	u32int	feature_incompat; 	/* incompatible feature set */

+	u32int	feature_ro_compat; 	/* readonly-compatible feature set */

+/* 0x0030 */

+	u8int 	uuid[UUID_SIZE];		/* 128-bit uuid for journal */

+/* 0x0040 */

+	u32int	nr_users;		/* Nr of filesystems sharing log */

+	u32int	dynsuper;		/* Blocknr of dynamic superblock copy*/

+/* 0x0048 */

+	u32int	max_transaction;	/* Limit of journal blocks per trans.*/

+	u32int	max_trandata;	/* Limit of data blocks per trans. */

+/* 0x0050 */

+	u8int 	checksum_type;	/* checksum type */

+	u8int 	padding2[3];

+	u32int	padding[42];

+	u32int	checksum;		/* crc32c(superblock) */

+/* 0x0100 */

+	u8int 	users[JBD_USERS_SIZE];		/* ids of all fs'es sharing the log */

+/* 0x0400 */

+};

+#pragma pack off

+#define JBD_SUPERBLOCK_SIZE sizeof(struct jbd_sb)

+#define JBD_HAS_COMPAT_FEATURE(jsb,mask)					\

+	((jsb)->header.blocktype >= to_be32(2) &&				\

+	 ((jsb)->feature_compat & to_be32((mask))))

+#define JBD_HAS_RO_COMPAT_FEATURE(jsb,mask)				\

+	((jsb)->header.blocktype >= to_be32(2) &&				\

+	 ((jsb)->feature_ro_compat & to_be32((mask))))

+#define JBD_HAS_INCOMPAT_FEATURE(jsb,mask)				\

+	((jsb)->header.blocktype >= to_be32(2) &&				\

+	 ((jsb)->feature_incompat & to_be32((mask))))

+#define JBD_FEATURE_COMPAT_CHECKSUM	0x00000001

+#define JBD_FEATURE_INCOMPAT_REVOKE		0x00000001

+#define JBD_FEATURE_INCOMPAT_64BIT		0x00000002

+#define JBD_FEATURE_INCOMPAT_ASYNC_COMMIT	0x00000004

+#define JBD_FEATURE_INCOMPAT_CSUM_V2		0x00000008

+#define JBD_FEATURE_INCOMPAT_CSUM_V3		0x00000010

+/* Features known to this kernel version: */

+#define JBD_KNOWN_COMPAT_FEATURES	0

+#define JBD_KNOWN_ROCOMPAT_FEATURES	0

+#define JBD_KNOWN_INCOMPAT_FEATURES	(JBD_FEATURE_INCOMPAT_REVOKE|\

+					 JBD_FEATURE_INCOMPAT_ASYNC_COMMIT|\

+					 JBD_FEATURE_INCOMPAT_64BIT|\

+					 JBD_FEATURE_INCOMPAT_CSUM_V2|\

+					 JBD_FEATURE_INCOMPAT_CSUM_V3)

+/*****************************************************************************/

+#define EXT4_CRC32_INIT (0xFFFFFFFFUL)

+/*****************************************************************************/

+#define ext4_malloc  malloc

+#define ext4_calloc  calloc

+#define ext4_realloc realloc

+#define ext4_free    free

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/queue.h

@@ -1,0 +1,612 @@

+/*-

+ * Copyright (c) 1991, 1993

+ *	The Regents of the University of California.  All rights reserved.

+ *

+ * Redistribution and use in source and binary forms, with or without

+ * modification, are permitted provided that the following conditions

+ * are met:

+ * 1. Redistributions of source code must retain the above copyright

+ *    notice, this list of conditions and the following disclaimer.

+ * 2. Redistributions in binary form must reproduce the above copyright

+ *    notice, this list of conditions and the following disclaimer in the

+ *    documentation and/or other materials provided with the distribution.

+ * 4. Neither the name of the University nor the names of its contributors

+ *    may be used to endorse or promote products derived from this software

+ *    without specific prior written permission.

+ *

+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND

+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE

+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

+ * SUCH DAMAGE.

+ *

+ *	@(#)queue.h	8.5 (Berkeley) 8/20/94

+ * $FreeBSD$

+ */

+#pragma once

+/*

+ * This file defines four types of data structures: singly-linked lists,

+ * singly-linked tail queues, lists and tail queues.

+ *

+ * A singly-linked list is headed by a single forward pointer. The elements

+ * are singly linked for minimum space and pointer manipulation overhead at

+ * the expense of O(n) removal for arbitrary elements. New elements can be

+ * added to the list after an existing element or at the head of the list.

+ * Elements being removed from the head of the list should use the explicit

+ * macro for this purpose for optimum efficiency. A singly-linked list may

+ * only be traversed in the forward direction.  Singly-linked lists are ideal

+ * for applications with large datasets and few or no removals or for

+ * implementing a LIFO queue.

+ *

+ * A singly-linked tail queue is headed by a pair of pointers, one to the

+ * head of the list and the other to the tail of the list. The elements are

+ * singly linked for minimum space and pointer manipulation overhead at the

+ * expense of O(n) removal for arbitrary elements. New elements can be added

+ * to the list after an existing element, at the head of the list, or at the

+ * end of the list. Elements being removed from the head of the tail queue

+ * should use the explicit macro for this purpose for optimum efficiency.

+ * A singly-linked tail queue may only be traversed in the forward direction.

+ * Singly-linked tail queues are ideal for applications with large datasets

+ * and few or no removals or for implementing a FIFO queue.

+ *

+ * A list is headed by a single forward pointer (or an array of forward

+ * pointers for a hash table header). The elements are doubly linked

+ * so that an arbitrary element can be removed without a need to

+ * traverse the list. New elements can be added to the list before

+ * or after an existing element or at the head of the list. A list

+ * may be traversed in either direction.

+ *

+ * A tail queue is headed by a pair of pointers, one to the head of the

+ * list and the other to the tail of the list. The elements are doubly

+ * linked so that an arbitrary element can be removed without a need to

+ * traverse the list. New elements can be added to the list before or

+ * after an existing element, at the head of the list, or at the end of

+ * the list. A tail queue may be traversed in either direction.

+ *

+ * For details on the use of these macros, see the queue(3) manual page.

+ *

+ *

+ *				SLIST	LIST	STAILQ	TAILQ

+ * _HEAD			+	+	+	+

+ * _HEAD_INITIALIZER		+	+	+	+

+ * _ENTRY			+	+	+	+

+ * _INIT			+	+	+	+

+ * _EMPTY			+	+	+	+

+ * _FIRST			+	+	+	+

+ * _NEXT			+	+	+	+

+ * _PREV			-	+	-	+

+ * _LAST			-	-	+	+

+ * _FOREACH			+	+	+	+

+ * _FOREACH_FROM		+	+	+	+

+ * _FOREACH_SAFE		+	+	+	+

+ * _FOREACH_FROM_SAFE		+	+	+	+

+ * _FOREACH_REVERSE		-	-	-	+

+ * _FOREACH_REVERSE_FROM	-	-	-	+

+ * _FOREACH_REVERSE_SAFE	-	-	-	+

+ * _FOREACH_REVERSE_FROM_SAFE	-	-	-	+

+ * _INSERT_HEAD			+	+	+	+

+ * _INSERT_BEFORE		-	+	-	+

+ * _INSERT_AFTER		+	+	+	+

+ * _INSERT_TAIL			-	-	+	+

+ * _CONCAT			-	-	+	+

+ * _REMOVE_AFTER		+	-	+	-

+ * _REMOVE_HEAD			+	-	+	-

+ * _REMOVE			+	+	+	+

+ * _SWAP			+	+	+	+

+ *

+ */

+#define	QMD_TRACE_ELEM(elem)

+#define	QMD_TRACE_HEAD(head)

+#define	QMD_SAVELINK(name, link)

+#define	TRACEBUF

+#define	TRACEBUF_INITIALIZER

+#define	TRASHIT(x)

+/*

+ * Singly-linked List declarations.

+ */

+#define	SLIST_HEAD(name, type)						\

+struct name {								\

+	struct type *slh_first;	/* first element */			\

+}

+#define	SLIST_HEAD_INITIALIZER(head)					\

+	{ nil }

+#define	SLIST_ENTRY(type)						\

+struct {								\

+	struct type *sle_next;	/* next element */			\

+}

+/*

+ * Singly-linked List functions.

+ */

+#define	SLIST_EMPTY(head)	((head)->slh_first == nil)

+#define	SLIST_FIRST(head)	((head)->slh_first)

+#define	SLIST_FOREACH(var, head, field)					\

+	for ((var) = SLIST_FIRST((head));				\

+	    (var);							\

+	    (var) = SLIST_NEXT((var), field))

+#define	SLIST_FOREACH_FROM(var, head, field)				\

+	for ((var) = ((var) ? (var) : SLIST_FIRST((head)));		\

+	    (var);							\

+	    (var) = SLIST_NEXT((var), field))

+#define	SLIST_FOREACH_SAFE(var, head, field, tvar)			\

+	for ((var) = SLIST_FIRST((head));				\

+	    (var) && ((tvar) = SLIST_NEXT((var), field), 1);		\

+	    (var) = (tvar))

+#define	SLIST_FOREACH_FROM_SAFE(var, head, field, tvar)			\

+	for ((var) = ((var) ? (var) : SLIST_FIRST((head)));		\

+	    (var) && ((tvar) = SLIST_NEXT((var), field), 1);		\

+	    (var) = (tvar))

+#define	SLIST_FOREACH_PREVPTR(var, varp, head, field)			\

+	for ((varp) = &SLIST_FIRST((head));				\

+	    ((var) = *(varp)) != nil;					\

+	    (varp) = &SLIST_NEXT((var), field))

+#define	SLIST_INIT(head) do {						\

+	SLIST_FIRST((head)) = nil;					\

+} while (0)

+#define	SLIST_INSERT_AFTER(slistelm, elm, field) do {			\

+	SLIST_NEXT((elm), field) = SLIST_NEXT((slistelm), field);	\

+	SLIST_NEXT((slistelm), field) = (elm);				\

+} while (0)

+#define	SLIST_INSERT_HEAD(head, elm, field) do {			\

+	SLIST_NEXT((elm), field) = SLIST_FIRST((head));			\

+	SLIST_FIRST((head)) = (elm);					\

+} while (0)

+#define	SLIST_NEXT(elm, field)	((elm)->field.sle_next)

+#define	SLIST_REMOVE(head, elm, type, field) do {			\

+	QMD_SAVELINK(oldnext, (elm)->field.sle_next);			\

+	if (SLIST_FIRST((head)) == (elm)) {				\

+		SLIST_REMOVE_HEAD((head), field);			\

+	}								\

+	else {								\

+		struct type *curelm = SLIST_FIRST((head));		\

+		while (SLIST_NEXT(curelm, field) != (elm))		\

+			curelm = SLIST_NEXT(curelm, field);		\

+		SLIST_REMOVE_AFTER(curelm, field);			\

+	}								\

+	TRASHIT(*oldnext);						\

+} while (0)

+#define SLIST_REMOVE_AFTER(elm, field) do {				\

+	SLIST_NEXT(elm, field) =					\

+	    SLIST_NEXT(SLIST_NEXT(elm, field), field);			\

+} while (0)

+#define	SLIST_REMOVE_HEAD(head, field) do {				\

+	SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field);	\

+} while (0)

+#define SLIST_SWAP(head1, head2, type) do {				\

+	struct type *swap_first = SLIST_FIRST(head1);			\

+	SLIST_FIRST(head1) = SLIST_FIRST(head2);			\

+	SLIST_FIRST(head2) = swap_first;				\

+} while (0)

+/*

+ * Singly-linked Tail queue declarations.

+ */

+#define	STAILQ_HEAD(name, type)						\

+struct name {								\

+	struct type *stqh_first;/* first element */			\

+	struct type **stqh_last;/* addr of last next element */		\

+}

+#define	STAILQ_HEAD_INITIALIZER(head)					\

+	{ nil, &(head).stqh_first }

+#define	STAILQ_ENTRY(type)						\

+struct {								\

+	struct type *stqe_next;	/* next element */			\

+}

+/*

+ * Singly-linked Tail queue functions.

+ */

+#define	STAILQ_CONCAT(head1, head2) do {				\

+	if (!STAILQ_EMPTY((head2))) {					\

+		*(head1)->stqh_last = (head2)->stqh_first;		\

+		(head1)->stqh_last = (head2)->stqh_last;		\

+		STAILQ_INIT((head2));					\

+	}								\

+} while (0)

+#define	STAILQ_EMPTY(head)	((head)->stqh_first == nil)

+#define	STAILQ_FIRST(head)	((head)->stqh_first)

+#define	STAILQ_FOREACH(var, head, field)				\

+	for((var) = STAILQ_FIRST((head));				\

+	   (var);							\

+	   (var) = STAILQ_NEXT((var), field))

+#define	STAILQ_FOREACH_FROM(var, head, field)				\

+	for ((var) = ((var) ? (var) : STAILQ_FIRST((head)));		\

+	   (var);							\

+	   (var) = STAILQ_NEXT((var), field))

+#define	STAILQ_FOREACH_SAFE(var, head, field, tvar)			\

+	for ((var) = STAILQ_FIRST((head));				\

+	    (var) && ((tvar) = STAILQ_NEXT((var), field), 1);		\

+	    (var) = (tvar))

+#define	STAILQ_FOREACH_FROM_SAFE(var, head, field, tvar)		\

+	for ((var) = ((var) ? (var) : STAILQ_FIRST((head)));		\

+	    (var) && ((tvar) = STAILQ_NEXT((var), field), 1);		\

+	    (var) = (tvar))

+#define	STAILQ_INIT(head) do {						\

+	STAILQ_FIRST((head)) = nil;					\

+	(head)->stqh_last = &STAILQ_FIRST((head));			\

+} while (0)

+#define	STAILQ_INSERT_AFTER(head, tqelm, elm, field) do {		\

+	if ((STAILQ_NEXT((elm), field) = STAILQ_NEXT((tqelm), field)) == nil)\

+		(head)->stqh_last = &STAILQ_NEXT((elm), field);		\

+	STAILQ_NEXT((tqelm), field) = (elm);				\

+} while (0)

+#define	STAILQ_INSERT_HEAD(head, elm, field) do {			\

+	if ((STAILQ_NEXT((elm), field) = STAILQ_FIRST((head))) == nil)	\

+		(head)->stqh_last = &STAILQ_NEXT((elm), field);		\

+	STAILQ_FIRST((head)) = (elm);					\

+} while (0)

+#define	STAILQ_INSERT_TAIL(head, elm, field) do {			\

+	STAILQ_NEXT((elm), field) = nil;				\

+	*(head)->stqh_last = (elm);					\

+	(head)->stqh_last = &STAILQ_NEXT((elm), field);			\

+} while (0)

+#define	STAILQ_LAST(head, type, field)					\

+	(STAILQ_EMPTY((head)) ? nil :					\

+	    __containerof((head)->stqh_last, struct type, field.stqe_next))

+#define	STAILQ_NEXT(elm, field)	((elm)->field.stqe_next)

+#define	STAILQ_REMOVE(head, elm, type, field) do {			\

+	QMD_SAVELINK(oldnext, (elm)->field.stqe_next);			\

+	if (STAILQ_FIRST((head)) == (elm)) {				\

+		STAILQ_REMOVE_HEAD((head), field);			\

+	}								\

+	else {								\

+		struct type *curelm = STAILQ_FIRST((head));		\

+		while (STAILQ_NEXT(curelm, field) != (elm))		\

+			curelm = STAILQ_NEXT(curelm, field);		\

+		STAILQ_REMOVE_AFTER(head, curelm, field);		\

+	}								\

+	TRASHIT(*oldnext);						\

+} while (0)

+#define STAILQ_REMOVE_AFTER(head, elm, field) do {			\

+	if ((STAILQ_NEXT(elm, field) =					\

+	     STAILQ_NEXT(STAILQ_NEXT(elm, field), field)) == nil)	\

+		(head)->stqh_last = &STAILQ_NEXT((elm), field);		\

+} while (0)

+#define	STAILQ_REMOVE_HEAD(head, field) do {				\

+	if ((STAILQ_FIRST((head)) =					\

+	     STAILQ_NEXT(STAILQ_FIRST((head)), field)) == nil)		\

+		(head)->stqh_last = &STAILQ_FIRST((head));		\

+} while (0)

+#define STAILQ_SWAP(head1, head2, type) do {				\

+	struct type *swap_first = STAILQ_FIRST(head1);			\

+	struct type **swap_last = (head1)->stqh_last;			\

+	STAILQ_FIRST(head1) = STAILQ_FIRST(head2);			\

+	(head1)->stqh_last = (head2)->stqh_last;			\

+	STAILQ_FIRST(head2) = swap_first;				\

+	(head2)->stqh_last = swap_last;					\

+	if (STAILQ_EMPTY(head1))					\

+		(head1)->stqh_last = &STAILQ_FIRST(head1);		\

+	if (STAILQ_EMPTY(head2))					\

+		(head2)->stqh_last = &STAILQ_FIRST(head2);		\

+} while (0)

+/*

+ * List declarations.

+ */

+#define	LIST_HEAD(name, type)						\

+struct name {								\

+	struct type *lh_first;	/* first element */			\

+}

+#define	LIST_HEAD_INITIALIZER(head)					\

+	{ nil }

+#define	LIST_ENTRY(type)						\

+struct {								\

+	struct type *le_next;	/* next element */			\

+	struct type **le_prev;	/* address of previous next element */	\

+}

+/*

+ * List functions.

+ */

+#define	QMD_LIST_CHECK_HEAD(head, field)

+#define	QMD_LIST_CHECK_NEXT(elm, field)

+#define	QMD_LIST_CHECK_PREV(elm, field)

+#define	LIST_EMPTY(head)	((head)->lh_first == nil)

+#define	LIST_FIRST(head)	((head)->lh_first)

+#define	LIST_FOREACH(var, head, field)					\

+	for ((var) = LIST_FIRST((head));				\

+	    (var);							\

+	    (var) = LIST_NEXT((var), field))

+#define	LIST_FOREACH_FROM(var, head, field)				\

+	for ((var) = ((var) ? (var) : LIST_FIRST((head)));		\

+	    (var);							\

+	    (var) = LIST_NEXT((var), field))

+#define	LIST_FOREACH_SAFE(var, head, field, tvar)			\

+	for ((var) = LIST_FIRST((head));				\

+	    (var) && ((tvar) = LIST_NEXT((var), field), 1);		\

+	    (var) = (tvar))

+#define	LIST_FOREACH_FROM_SAFE(var, head, field, tvar)			\

+	for ((var) = ((var) ? (var) : LIST_FIRST((head)));		\

+	    (var) && ((tvar) = LIST_NEXT((var), field), 1);		\

+	    (var) = (tvar))

+#define	LIST_INIT(head) do {						\

+	LIST_FIRST((head)) = nil;					\

+} while (0)

+#define	LIST_INSERT_AFTER(listelm, elm, field) do {			\

+	QMD_LIST_CHECK_NEXT(listelm, field);				\

+	if ((LIST_NEXT((elm), field) = LIST_NEXT((listelm), field)) != nil)\

+		LIST_NEXT((listelm), field)->field.le_prev =		\

+		    &LIST_NEXT((elm), field);				\

+	LIST_NEXT((listelm), field) = (elm);				\

+	(elm)->field.le_prev = &LIST_NEXT((listelm), field);		\

+} while (0)

+#define	LIST_INSERT_BEFORE(listelm, elm, field) do {			\

+	QMD_LIST_CHECK_PREV(listelm, field);				\

+	(elm)->field.le_prev = (listelm)->field.le_prev;		\

+	LIST_NEXT((elm), field) = (listelm);				\

+	*(listelm)->field.le_prev = (elm);				\

+	(listelm)->field.le_prev = &LIST_NEXT((elm), field);		\

+} while (0)

+#define	LIST_INSERT_HEAD(head, elm, field) do {				\

+	QMD_LIST_CHECK_HEAD((head), field);				\

+	if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != nil)	\

+		LIST_FIRST((head))->field.le_prev = &LIST_NEXT((elm), field);\

+	LIST_FIRST((head)) = (elm);					\

+	(elm)->field.le_prev = &LIST_FIRST((head));			\

+} while (0)

+#define	LIST_NEXT(elm, field)	((elm)->field.le_next)

+#define	LIST_PREV(elm, head, type, field)				\

+	((elm)->field.le_prev == &LIST_FIRST((head)) ? nil :		\

+	    __containerof((elm)->field.le_prev, struct type, field.le_next))

+#define	LIST_REMOVE(elm, field) do {					\

+	QMD_SAVELINK(oldnext, (elm)->field.le_next);			\

+	QMD_SAVELINK(oldprev, (elm)->field.le_prev);			\

+	QMD_LIST_CHECK_NEXT(elm, field);				\

+	QMD_LIST_CHECK_PREV(elm, field);				\

+	if (LIST_NEXT((elm), field) != nil)				\

+		LIST_NEXT((elm), field)->field.le_prev = 		\

+		    (elm)->field.le_prev;				\

+	*(elm)->field.le_prev = LIST_NEXT((elm), field);		\

+	TRASHIT(*oldnext);						\

+	TRASHIT(*oldprev);						\

+} while (0)

+#define LIST_SWAP(head1, head2, type, field) do {			\

+	struct type *swap_tmp = LIST_FIRST((head1));			\

+	LIST_FIRST((head1)) = LIST_FIRST((head2));			\

+	LIST_FIRST((head2)) = swap_tmp;					\

+	if ((swap_tmp = LIST_FIRST((head1))) != nil)			\

+		swap_tmp->field.le_prev = &LIST_FIRST((head1));		\

+	if ((swap_tmp = LIST_FIRST((head2))) != nil)			\

+		swap_tmp->field.le_prev = &LIST_FIRST((head2));		\

+} while (0)

+/*

+ * Tail queue declarations.

+ */

+#define	TAILQ_HEAD(name, type)						\

+struct name {								\

+	struct type *tqh_first;	/* first element */			\

+	struct type **tqh_last;	/* addr of last next element */		\

+	TRACEBUF							\

+}

+#define	TAILQ_HEAD_INITIALIZER(head)					\

+	{ nil, &(head).tqh_first, TRACEBUF_INITIALIZER }

+#define	TAILQ_ENTRY(type)						\

+struct {								\

+	struct type *tqe_next;	/* next element */			\

+	struct type **tqe_prev;	/* address of previous next element */	\

+	TRACEBUF							\

+}

+/*

+ * Tail queue functions.

+ */

+#define	QMD_TAILQ_CHECK_HEAD(head, field)

+#define	QMD_TAILQ_CHECK_TAIL(head, headname)

+#define	QMD_TAILQ_CHECK_NEXT(elm, field)

+#define	QMD_TAILQ_CHECK_PREV(elm, field)

+#define	TAILQ_CONCAT(head1, head2, field) do {				\

+	if (!TAILQ_EMPTY(head2)) {					\

+		*(head1)->tqh_last = (head2)->tqh_first;		\

+		(head2)->tqh_first->field.tqe_prev = (head1)->tqh_last;	\

+		(head1)->tqh_last = (head2)->tqh_last;			\

+		TAILQ_INIT((head2));					\

+		QMD_TRACE_HEAD(head1);					\

+		QMD_TRACE_HEAD(head2);					\

+	}								\

+} while (0)

+#define	TAILQ_EMPTY(head)	((head)->tqh_first == nil)

+#define	TAILQ_FIRST(head)	((head)->tqh_first)

+#define	TAILQ_FOREACH(var, head, field)					\

+	for ((var) = TAILQ_FIRST((head));				\

+	    (var);							\

+	    (var) = TAILQ_NEXT((var), field))

+#define	TAILQ_FOREACH_FROM(var, head, field)				\

+	for ((var) = ((var) ? (var) : TAILQ_FIRST((head)));		\

+	    (var);							\

+	    (var) = TAILQ_NEXT((var), field))

+#define	TAILQ_FOREACH_SAFE(var, head, field, tvar)			\

+	for ((var) = TAILQ_FIRST((head));				\

+	    (var) && ((tvar) = TAILQ_NEXT((var), field), 1);		\

+	    (var) = (tvar))

+#define	TAILQ_FOREACH_FROM_SAFE(var, head, field, tvar)			\

+	for ((var) = ((var) ? (var) : TAILQ_FIRST((head)));		\

+	    (var) && ((tvar) = TAILQ_NEXT((var), field), 1);		\

+	    (var) = (tvar))

+#define	TAILQ_FOREACH_REVERSE(var, head, headname, field)		\

+	for ((var) = TAILQ_LAST((head), headname);			\

+	    (var);							\

+	    (var) = TAILQ_PREV((var), headname, field))

+#define	TAILQ_FOREACH_REVERSE_FROM(var, head, headname, field)		\

+	for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname));	\

+	    (var);							\

+	    (var) = TAILQ_PREV((var), headname, field))

+#define	TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar)	\

+	for ((var) = TAILQ_LAST((head), headname);			\

+	    (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1);	\

+	    (var) = (tvar))

+#define	TAILQ_FOREACH_REVERSE_FROM_SAFE(var, head, headname, field, tvar) \

+	for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname));	\

+	    (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1);	\

+	    (var) = (tvar))

+#define	TAILQ_INIT(head) do {						\

+	TAILQ_FIRST((head)) = nil;					\

+	(head)->tqh_last = &TAILQ_FIRST((head));			\

+	QMD_TRACE_HEAD(head);						\

+} while (0)

+#define	TAILQ_INSERT_AFTER(head, listelm, elm, field) do {		\

+	QMD_TAILQ_CHECK_NEXT(listelm, field);				\

+	if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != nil)\

+		TAILQ_NEXT((elm), field)->field.tqe_prev = 		\

+		    &TAILQ_NEXT((elm), field);				\

+	else {								\

+		(head)->tqh_last = &TAILQ_NEXT((elm), field);		\

+		QMD_TRACE_HEAD(head);					\

+	}								\

+	TAILQ_NEXT((listelm), field) = (elm);				\

+	(elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field);		\

+	QMD_TRACE_ELEM(&(elm)->field);					\

+	QMD_TRACE_ELEM(&(listelm)->field);				\

+} while (0)

+#define	TAILQ_INSERT_BEFORE(listelm, elm, field) do {			\

+	QMD_TAILQ_CHECK_PREV(listelm, field);				\

+	(elm)->field.tqe_prev = (listelm)->field.tqe_prev;		\

+	TAILQ_NEXT((elm), field) = (listelm);				\

+	*(listelm)->field.tqe_prev = (elm);				\

+	(listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field);		\

+	QMD_TRACE_ELEM(&(elm)->field);					\

+	QMD_TRACE_ELEM(&(listelm)->field);				\

+} while (0)

+#define	TAILQ_INSERT_HEAD(head, elm, field) do {			\

+	QMD_TAILQ_CHECK_HEAD(head, field);				\

+	if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != nil)	\

+		TAILQ_FIRST((head))->field.tqe_prev =			\

+		    &TAILQ_NEXT((elm), field);				\

+	else								\

+		(head)->tqh_last = &TAILQ_NEXT((elm), field);		\

+	TAILQ_FIRST((head)) = (elm);					\

+	(elm)->field.tqe_prev = &TAILQ_FIRST((head));			\

+	QMD_TRACE_HEAD(head);						\

+	QMD_TRACE_ELEM(&(elm)->field);					\

+} while (0)

+#define	TAILQ_INSERT_TAIL(head, elm, field) do {			\

+	QMD_TAILQ_CHECK_TAIL(head, field);				\

+	TAILQ_NEXT((elm), field) = nil;				\

+	(elm)->field.tqe_prev = (head)->tqh_last;			\

+	*(head)->tqh_last = (elm);					\

+	(head)->tqh_last = &TAILQ_NEXT((elm), field);			\

+	QMD_TRACE_HEAD(head);						\

+	QMD_TRACE_ELEM(&(elm)->field);					\

+} while (0)

+#define	TAILQ_LAST(head, headname)					\

+	(*(((struct headname *)((head)->tqh_last))->tqh_last))

+#define	TAILQ_NEXT(elm, field) ((elm)->field.tqe_next)

+#define	TAILQ_PREV(elm, headname, field)				\

+	(*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))

+#define	TAILQ_REMOVE(head, elm, field) do {				\

+	QMD_SAVELINK(oldnext, (elm)->field.tqe_next);			\

+	QMD_SAVELINK(oldprev, (elm)->field.tqe_prev);			\

+	QMD_TAILQ_CHECK_NEXT(elm, field);				\

+	QMD_TAILQ_CHECK_PREV(elm, field);				\

+	if ((TAILQ_NEXT((elm), field)) != nil)				\

+		TAILQ_NEXT((elm), field)->field.tqe_prev = 		\

+		    (elm)->field.tqe_prev;				\

+	else {								\

+		(head)->tqh_last = (elm)->field.tqe_prev;		\

+		QMD_TRACE_HEAD(head);					\

+	}								\

+	*(elm)->field.tqe_prev = TAILQ_NEXT((elm), field);		\

+	TRASHIT(*oldnext);						\

+	TRASHIT(*oldprev);						\

+	QMD_TRACE_ELEM(&(elm)->field);					\

+} while (0)

+#define TAILQ_SWAP(head1, head2, type, field) do {			\

+	struct type *swap_first = (head1)->tqh_first;			\

+	struct type **swap_last = (head1)->tqh_last;			\

+	(head1)->tqh_first = (head2)->tqh_first;			\

+	(head1)->tqh_last = (head2)->tqh_last;				\

+	(head2)->tqh_first = swap_first;				\

+	(head2)->tqh_last = swap_last;					\

+	if ((swap_first = (head1)->tqh_first) != nil)			\

+		swap_first->field.tqe_prev = &(head1)->tqh_first;	\

+	else								\

+		(head1)->tqh_last = &(head1)->tqh_first;		\

+	if ((swap_first = (head2)->tqh_first) != nil)			\

+		swap_first->field.tqe_prev = &(head2)->tqh_first;	\

+	else								\

+		(head2)->tqh_last = &(head2)->tqh_first;		\

+} while (0)

--- /dev/null

+++ b/sys/src/cmd/ext4srv/include/tree.h

@@ -1,0 +1,796 @@

+/*	$NetBSD: tree.h,v 1.8 2004/03/28 19:38:30 provos Exp $	*/

+/*	$OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $	*/

+/* $FreeBSD$ */

+/*-

+ * Copyright 2002 Niels Provos <[email protected]>

+ * All rights reserved.

+ *

+ * Redistribution and use in source and binary forms, with or without

+ * modification, are permitted provided that the following conditions

+ * are met:

+ * 1. Redistributions of source code must retain the above copyright

+ *    notice, this list of conditions and the following disclaimer.

+ * 2. Redistributions in binary form must reproduce the above copyright

+ *    notice, this list of conditions and the following disclaimer in the

+ *    documentation and/or other materials provided with the distribution.

+ *

+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR

+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES

+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.

+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,

+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT

+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF

+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ */

+#pragma once

+/*

+ * This file defines data structures for different types of trees:

+ * splay trees and red-black trees.

+ *

+ * A splay tree is a self-organizing data structure.  Every operation

+ * on the tree causes a splay to happen.  The splay moves the requested

+ * node to the root of the tree and partly rebalances it.

+ *

+ * This has the benefit that request locality causes faster lookups as

+ * the requested nodes move to the top of the tree.  On the other hand,

+ * every lookup causes memory writes.

+ *

+ * The Balance Theorem bounds the total access time for m operations

+ * and n inserts on an initially empty tree as O((m + n)lg n).  The

+ * amortized cost for a sequence of m accesses to a splay tree is O(lg n);

+ *

+ * A red-black tree is a binary search tree with the node color as an

+ * extra attribute.  It fulfills a set of conditions:

+ *	- every search path from the root to a leaf consists of the

+ *	  same number of black nodes,

+ *	- each red node (except for the root) has a black parent,

+ *	- each leaf node is black.

+ *

+ * Every operation on a red-black tree is bounded as O(lg n).

+ * The maximum height of a red-black tree is 2lg (n+1).

+ */

+#define SPLAY_HEAD(name, type)						\

+struct name {								\

+	struct type *sph_root; /* root of the tree */			\

+}

+#define SPLAY_INITIALIZER(root)						\

+	{ nil }

+#define SPLAY_INIT(root) do {						\

+	(root)->sph_root = nil;					\

+} while (/*CONSTCOND*/ 0)

+#define SPLAY_ENTRY(type)						\

+struct {								\

+	struct type *spe_left; /* left element */			\

+	struct type *spe_right; /* right element */			\

+}

+#define SPLAY_LEFT(elm, field)		(elm)->field.spe_left

+#define SPLAY_RIGHT(elm, field)		(elm)->field.spe_right

+#define SPLAY_ROOT(head)		(head)->sph_root

+#define SPLAY_EMPTY(head)		(SPLAY_ROOT(head) == nil)

+/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */

+#define SPLAY_ROTATE_RIGHT(head, tmp, field) do {			\

+	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field);	\

+	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\

+	(head)->sph_root = tmp;						\

+} while (/*CONSTCOND*/ 0)

+#define SPLAY_ROTATE_LEFT(head, tmp, field) do {			\

+	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field);	\

+	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\

+	(head)->sph_root = tmp;						\

+} while (/*CONSTCOND*/ 0)

+#define SPLAY_LINKLEFT(head, tmp, field) do {				\

+	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\

+	tmp = (head)->sph_root;						\

+	(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);		\

+} while (/*CONSTCOND*/ 0)

+#define SPLAY_LINKRIGHT(head, tmp, field) do {				\

+	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\

+	tmp = (head)->sph_root;						\

+	(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);	\

+} while (/*CONSTCOND*/ 0)

+#define SPLAY_ASSEMBLE(head, node, left, right, field) do {		\

+	SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field);	\

+	SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\

+	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field);	\

+	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field);	\

+} while (/*CONSTCOND*/ 0)

+/* Generates prototypes and inline functions */

+#define SPLAY_PROTOTYPE(name, type, field, cmp)				\

+void name##_SPLAY(struct name *, struct type *);			\

+void name##_SPLAY_MINMAX(struct name *, int);				\

+struct type *name##_SPLAY_INSERT(struct name *, struct type *);		\

+struct type *name##_SPLAY_REMOVE(struct name *, struct type *);		\

+									\

+/* Finds the node with the same key as elm */				\

+static __inline struct type *						\

+name##_SPLAY_FIND(struct name *head, struct type *elm)			\

+{									\

+	if (SPLAY_EMPTY(head))						\

+		return(nil);						\

+	name##_SPLAY(head, elm);					\

+	if ((cmp)(elm, (head)->sph_root) == 0)				\

+		return (head->sph_root);				\

+	return (nil);							\

+}									\

+									\

+static __inline struct type *						\

+name##_SPLAY_NEXT(struct name *head, struct type *elm)			\

+{									\

+	name##_SPLAY(head, elm);					\

+	if (SPLAY_RIGHT(elm, field) != nil) {				\

+		elm = SPLAY_RIGHT(elm, field);				\

+		while (SPLAY_LEFT(elm, field) != nil) {		\

+			elm = SPLAY_LEFT(elm, field);			\

+		}							\

+	} else								\

+		elm = nil;						\

+	return (elm);							\

+}									\

+									\

+static __inline struct type *						\

+name##_SPLAY_MIN_MAX(struct name *head, int val)			\

+{									\

+	name##_SPLAY_MINMAX(head, val);					\

+        return (SPLAY_ROOT(head));					\

+}

+/* Main splay operation.

+ * Moves node close to the key of elm to top

+ */

+#define SPLAY_GENERATE(name, type, field, cmp)				\

+struct type *								\

+name##_SPLAY_INSERT(struct name *head, struct type *elm)		\

+{									\

+    if (SPLAY_EMPTY(head)) {						\

+	    SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = nil;	\

+    } else {								\

+	    int __comp;							\

+	    name##_SPLAY(head, elm);					\

+	    __comp = (cmp)(elm, (head)->sph_root);			\

+	    if(__comp < 0) {						\

+		    SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\

+		    SPLAY_RIGHT(elm, field) = (head)->sph_root;		\

+		    SPLAY_LEFT((head)->sph_root, field) = nil;		\

+	    } else if (__comp > 0) {					\

+		    SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\

+		    SPLAY_LEFT(elm, field) = (head)->sph_root;		\

+		    SPLAY_RIGHT((head)->sph_root, field) = nil;	\

+	    } else							\

+		    return ((head)->sph_root);				\

+    }									\

+    (head)->sph_root = (elm);						\

+    return (nil);							\

+}									\

+									\

+struct type *								\

+name##_SPLAY_REMOVE(struct name *head, struct type *elm)		\

+{									\

+	struct type *__tmp;						\

+	if (SPLAY_EMPTY(head))						\

+		return (nil);						\

+	name##_SPLAY(head, elm);					\

+	if ((cmp)(elm, (head)->sph_root) == 0) {			\

+		if (SPLAY_LEFT((head)->sph_root, field) == nil) {	\

+			(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\

+		} else {						\

+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\

+			(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\

+			name##_SPLAY(head, elm);			\

+			SPLAY_RIGHT((head)->sph_root, field) = __tmp;	\

+		}							\

+		return (elm);						\

+	}								\

+	return (nil);							\

+}									\

+									\

+void									\

+name##_SPLAY(struct name *head, struct type *elm)			\

+{									\

+	struct type __node, *__left, *__right, *__tmp;			\

+	int __comp;							\

+\

+	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = nil;\

+	__left = __right = &__node;					\

+\

+	while ((__comp = (cmp)(elm, (head)->sph_root)) != 0) {		\

+		if (__comp < 0) {					\

+			__tmp = SPLAY_LEFT((head)->sph_root, field);	\

+			if (__tmp == nil)				\

+				break;					\

+			if ((cmp)(elm, __tmp) < 0){			\

+				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\

+				if (SPLAY_LEFT((head)->sph_root, field) == nil)\

+					break;				\

+			}						\

+			SPLAY_LINKLEFT(head, __right, field);		\

+		} else if (__comp > 0) {				\

+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\

+			if (__tmp == nil)				\

+				break;					\

+			if ((cmp)(elm, __tmp) > 0){			\

+				SPLAY_ROTATE_LEFT(head, __tmp, field);	\

+				if (SPLAY_RIGHT((head)->sph_root, field) == nil)\

+					break;				\

+			}						\

+			SPLAY_LINKRIGHT(head, __left, field);		\

+		}							\

+	}								\

+	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\

+}									\

+									\

+/* Splay with either the minimum or the maximum element			\

+ * Used to find minimum or maximum element in tree.			\

+ */									\

+void name##_SPLAY_MINMAX(struct name *head, int __comp) \

+{									\

+	struct type __node, *__left, *__right, *__tmp;			\

+\

+	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = nil;\

+	__left = __right = &__node;					\

+\

+	while (1) {							\

+		if (__comp < 0) {					\

+			__tmp = SPLAY_LEFT((head)->sph_root, field);	\

+			if (__tmp == nil)				\

+				break;					\

+			if (__comp < 0){				\

+				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\

+				if (SPLAY_LEFT((head)->sph_root, field) == nil)\

+					break;				\

+			}						\

+			SPLAY_LINKLEFT(head, __right, field);		\

+		} else if (__comp > 0) {				\

+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\

+			if (__tmp == nil)				\

+				break;					\

+			if (__comp > 0) {				\

+				SPLAY_ROTATE_LEFT(head, __tmp, field);	\

+				if (SPLAY_RIGHT((head)->sph_root, field) == nil)\

+					break;				\

+			}						\

+			SPLAY_LINKRIGHT(head, __left, field);		\

+		}							\

+	}								\

+	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\

+}

+#define SPLAY_NEGINF	-1

+#define SPLAY_INF	1

+#define SPLAY_INSERT(name, x, y)	name##_SPLAY_INSERT(x, y)

+#define SPLAY_REMOVE(name, x, y)	name##_SPLAY_REMOVE(x, y)

+#define SPLAY_FIND(name, x, y)		name##_SPLAY_FIND(x, y)

+#define SPLAY_NEXT(name, x, y)		name##_SPLAY_NEXT(x, y)

+#define SPLAY_MIN(name, x)		(SPLAY_EMPTY(x) ? nil	\

+					: name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))

+#define SPLAY_MAX(name, x)		(SPLAY_EMPTY(x) ? nil	\

+					: name##_SPLAY_MIN_MAX(x, SPLAY_INF))

+#define SPLAY_FOREACH(x, name, head)					\

+	for ((x) = SPLAY_MIN(name, head);				\

+	     (x) != nil;						\

+	     (x) = SPLAY_NEXT(name, head, x))

+/* Macros that define a red-black tree */

+#define RB_HEAD(name, type)						\

+struct name {								\

+	struct type *rbh_root; /* root of the tree */			\

+}

+#define RB_INITIALIZER(root)						\

+	{ nil }

+#define RB_INIT(root) do {						\

+	(root)->rbh_root = nil;					\

+} while (/*CONSTCOND*/ 0)

+#define RB_BLACK	0

+#define RB_RED		1

+#define RB_ENTRY(type)							\

+struct {								\

+	struct type *rbe_left;		/* left element */		\

+	struct type *rbe_right;		/* right element */		\

+	struct type *rbe_parent;	/* parent element */		\

+	int rbe_color;			/* node color */		\

+}

+#define RB_LEFT(elm, field)		(elm)->field.rbe_left

+#define RB_RIGHT(elm, field)		(elm)->field.rbe_right

+#define RB_PARENT(elm, field)		(elm)->field.rbe_parent

+#define RB_COLOR(elm, field)		(elm)->field.rbe_color

+#define RB_ROOT(head)			(head)->rbh_root

+#define RB_EMPTY(head)			(RB_ROOT(head) == nil)

+#define RB_SET(elm, parent, field) do {					\

+	RB_PARENT(elm, field) = parent;					\

+	RB_LEFT(elm, field) = RB_RIGHT(elm, field) = nil;		\

+	RB_COLOR(elm, field) = RB_RED;					\

+} while (/*CONSTCOND*/ 0)

+#define RB_SET_BLACKRED(black, red, field) do {				\

+	RB_COLOR(black, field) = RB_BLACK;				\

+	RB_COLOR(red, field) = RB_RED;					\

+} while (/*CONSTCOND*/ 0)

+#ifndef RB_AUGMENT

+#define RB_AUGMENT(x)	do {} while (0)

+#endif

+#define RB_ROTATE_LEFT(head, elm, tmp, field) do {			\

+	(tmp) = RB_RIGHT(elm, field);					\

+	if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field)) != nil) {	\

+		RB_PARENT(RB_LEFT(tmp, field), field) = (elm);		\

+	}								\

+	RB_AUGMENT(elm);						\

+	if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != nil) {	\

+		if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))	\

+			RB_LEFT(RB_PARENT(elm, field), field) = (tmp);	\

+		else							\

+			RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);	\

+	} else								\

+		(head)->rbh_root = (tmp);				\

+	RB_LEFT(tmp, field) = (elm);					\

+	RB_PARENT(elm, field) = (tmp);					\

+	RB_AUGMENT(tmp);						\

+	if ((RB_PARENT(tmp, field)))					\

+		RB_AUGMENT(RB_PARENT(tmp, field));			\

+} while (/*CONSTCOND*/ 0)

+#define RB_ROTATE_RIGHT(head, elm, tmp, field) do {			\

+	(tmp) = RB_LEFT(elm, field);					\

+	if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field)) != nil) {	\

+		RB_PARENT(RB_RIGHT(tmp, field), field) = (elm);		\

+	}								\

+	RB_AUGMENT(elm);						\

+	if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != nil) {	\

+		if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))	\

+			RB_LEFT(RB_PARENT(elm, field), field) = (tmp);	\

+		else							\

+			RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);	\

+	} else								\

+		(head)->rbh_root = (tmp);				\

+	RB_RIGHT(tmp, field) = (elm);					\

+	RB_PARENT(elm, field) = (tmp);					\

+	RB_AUGMENT(tmp);						\

+	if ((RB_PARENT(tmp, field)))					\

+		RB_AUGMENT(RB_PARENT(tmp, field));			\

+} while (/*CONSTCOND*/ 0)

+/* Generates prototypes and inline functions */

+#define	RB_PROTOTYPE(name, type, field, cmp)				\

+	RB_PROTOTYPE_INTERNAL(name, type, field, cmp,)

+#define	RB_PROTOTYPE_STATIC(name, type, field, cmp)			\

+	RB_PROTOTYPE_INTERNAL(name, type, field, cmp, static)

+#define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr)		\

+	RB_PROTOTYPE_INSERT_COLOR(name, type, attr);			\

+	RB_PROTOTYPE_REMOVE_COLOR(name, type, attr);			\

+	RB_PROTOTYPE_INSERT(name, type, attr);				\

+	RB_PROTOTYPE_REMOVE(name, type, attr);				\

+	RB_PROTOTYPE_FIND(name, type, attr);				\

+	RB_PROTOTYPE_NFIND(name, type, attr);				\

+	RB_PROTOTYPE_NEXT(name, type, attr);				\

+	RB_PROTOTYPE_PREV(name, type, attr);				\

+	RB_PROTOTYPE_MINMAX(name, type, attr);

+#define RB_PROTOTYPE_INSERT_COLOR(name, type, attr)			\

+	attr void name##_RB_INSERT_COLOR(struct name *, struct type *)

+#define RB_PROTOTYPE_REMOVE_COLOR(name, type, attr)			\

+	attr void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *)

+#define RB_PROTOTYPE_REMOVE(name, type, attr)				\

+	attr struct type *name##_RB_REMOVE(struct name *, struct type *)

+#define RB_PROTOTYPE_INSERT(name, type, attr)				\

+	attr struct type *name##_RB_INSERT(struct name *, struct type *)

+#define RB_PROTOTYPE_FIND(name, type, attr)				\

+	attr struct type *name##_RB_FIND(struct name *, struct type *)

+#define RB_PROTOTYPE_NFIND(name, type, attr)				\

+	attr struct type *name##_RB_NFIND(struct name *, struct type *)

+#define RB_PROTOTYPE_NEXT(name, type, attr)				\

+	attr struct type *name##_RB_NEXT(struct type *)

+#define RB_PROTOTYPE_PREV(name, type, attr)				\

+	attr struct type *name##_RB_PREV(struct type *)

+#define RB_PROTOTYPE_MINMAX(name, type, attr)				\

+	attr struct type *name##_RB_MINMAX(struct name *, int)

+/* Main rb operation.

+ * Moves node close to the key of elm to top

+ */

+#define	RB_GENERATE(name, type, field, cmp)				\

+	RB_GENERATE_INTERNAL(name, type, field, cmp,)

+#define	RB_GENERATE_STATIC(name, type, field, cmp)			\

+	RB_GENERATE_INTERNAL(name, type, field, cmp, static)

+#define RB_GENERATE_INTERNAL(name, type, field, cmp, attr)		\

+	RB_GENERATE_INSERT_COLOR(name, type, field, attr)		\

+	RB_GENERATE_REMOVE_COLOR(name, type, field, attr)		\

+	RB_GENERATE_INSERT(name, type, field, cmp, attr)		\

+	RB_GENERATE_REMOVE(name, type, field, attr)			\

+	RB_GENERATE_FIND(name, type, field, cmp, attr)			\

+	RB_GENERATE_NFIND(name, type, field, cmp, attr)			\

+	RB_GENERATE_NEXT(name, type, field, attr)			\

+	RB_GENERATE_PREV(name, type, field, attr)			\

+	RB_GENERATE_MINMAX(name, type, field, attr)

+#define RB_GENERATE_INSERT_COLOR(name, type, field, attr)		\

+attr void								\

+name##_RB_INSERT_COLOR(struct name *head, struct type *elm)		\

+{									\

+	struct type *parent, *gparent, *tmp;				\

+	while ((parent = RB_PARENT(elm, field)) != nil &&		\

+	    RB_COLOR(parent, field) == RB_RED) {			\

+		gparent = RB_PARENT(parent, field);			\

+		if (parent == RB_LEFT(gparent, field)) {		\

+			tmp = RB_RIGHT(gparent, field);			\

+			if (tmp && RB_COLOR(tmp, field) == RB_RED) {	\

+				RB_COLOR(tmp, field) = RB_BLACK;	\

+				RB_SET_BLACKRED(parent, gparent, field);\

+				elm = gparent;				\

+				continue;				\

+			}						\

+			if (RB_RIGHT(parent, field) == elm) {		\

+				RB_ROTATE_LEFT(head, parent, tmp, field);\

+				tmp = parent;				\

+				parent = elm;				\

+				elm = tmp;				\

+			}						\

+			RB_SET_BLACKRED(parent, gparent, field);	\

+			RB_ROTATE_RIGHT(head, gparent, tmp, field);	\

+		} else {						\

+			tmp = RB_LEFT(gparent, field);			\

+			if (tmp && RB_COLOR(tmp, field) == RB_RED) {	\

+				RB_COLOR(tmp, field) = RB_BLACK;	\

+				RB_SET_BLACKRED(parent, gparent, field);\

+				elm = gparent;				\

+				continue;				\

+			}						\

+			if (RB_LEFT(parent, field) == elm) {		\

+				RB_ROTATE_RIGHT(head, parent, tmp, field);\

+				tmp = parent;				\

+				parent = elm;				\

+				elm = tmp;				\

+			}						\

+			RB_SET_BLACKRED(parent, gparent, field);	\

+			RB_ROTATE_LEFT(head, gparent, tmp, field);	\

+		}							\

+	}								\

+	RB_COLOR(head->rbh_root, field) = RB_BLACK;			\

+}

+#define RB_GENERATE_REMOVE_COLOR(name, type, field, attr)		\

+attr void								\

+name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \

+{									\

+	struct type *tmp;						\

+	while ((elm == nil || RB_COLOR(elm, field) == RB_BLACK) &&	\

+	    elm != RB_ROOT(head) && parent != nil) {					\

+		if (RB_LEFT(parent, field) == elm) {			\

+			tmp = RB_RIGHT(parent, field);			\

+			if (RB_COLOR(tmp, field) == RB_RED) {		\

+				RB_SET_BLACKRED(tmp, parent, field);	\

+				RB_ROTATE_LEFT(head, parent, tmp, field);\

+				tmp = RB_RIGHT(parent, field);		\

+			}						\

+			if ((RB_LEFT(tmp, field) == nil ||		\

+			    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\

+			    (RB_RIGHT(tmp, field) == nil ||		\

+			    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\

+				RB_COLOR(tmp, field) = RB_RED;		\

+				elm = parent;				\

+				parent = RB_PARENT(elm, field);		\

+			} else {					\

+				if (RB_RIGHT(tmp, field) == nil ||	\

+				    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\

+					struct type *oleft;		\

+					if ((oleft = RB_LEFT(tmp, field)) \

+					    != nil)			\

+						RB_COLOR(oleft, field) = RB_BLACK;\

+					RB_COLOR(tmp, field) = RB_RED;	\

+					RB_ROTATE_RIGHT(head, tmp, oleft, field);\

+					tmp = RB_RIGHT(parent, field);	\

+				}					\

+				RB_COLOR(tmp, field) = RB_COLOR(parent, field);\

+				RB_COLOR(parent, field) = RB_BLACK;	\

+				if (RB_RIGHT(tmp, field))		\

+					RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\

+				RB_ROTATE_LEFT(head, parent, tmp, field);\

+				elm = RB_ROOT(head);			\

+				break;					\

+			}						\

+		} else {						\

+			tmp = RB_LEFT(parent, field);			\

+			if (RB_COLOR(tmp, field) == RB_RED) {		\

+				RB_SET_BLACKRED(tmp, parent, field);	\

+				RB_ROTATE_RIGHT(head, parent, tmp, field);\

+				tmp = RB_LEFT(parent, field);		\

+			}						\

+			if ((RB_LEFT(tmp, field) == nil ||		\

+			    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\

+			    (RB_RIGHT(tmp, field) == nil ||		\

+			    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\

+				RB_COLOR(tmp, field) = RB_RED;		\

+				elm = parent;				\

+				parent = RB_PARENT(elm, field);		\

+			} else {					\

+				if (RB_LEFT(tmp, field) == nil ||	\

+				    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\

+					struct type *oright;		\

+					if ((oright = RB_RIGHT(tmp, field)) \

+					    != nil)			\

+						RB_COLOR(oright, field) = RB_BLACK;\

+					RB_COLOR(tmp, field) = RB_RED;	\

+					RB_ROTATE_LEFT(head, tmp, oright, field);\

+					tmp = RB_LEFT(parent, field);	\

+				}					\

+				RB_COLOR(tmp, field) = RB_COLOR(parent, field);\

+				RB_COLOR(parent, field) = RB_BLACK;	\

+				if (RB_LEFT(tmp, field))		\

+					RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\

+				RB_ROTATE_RIGHT(head, parent, tmp, field);\

+				elm = RB_ROOT(head);			\

+				break;					\

+			}						\

+		}							\

+	}								\

+	if (elm)							\

+		RB_COLOR(elm, field) = RB_BLACK;			\

+}

+#define RB_GENERATE_REMOVE(name, type, field, attr)			\

+attr struct type *							\

+name##_RB_REMOVE(struct name *head, struct type *elm)			\

+{									\

+	struct type *child, *parent, *old = elm;			\

+	int color;							\

+	if (RB_LEFT(elm, field) == nil)				\

+		child = RB_RIGHT(elm, field);				\

+	else if (RB_RIGHT(elm, field) == nil)				\

+		child = RB_LEFT(elm, field);				\

+	else {								\

+		struct type *left;					\

+		elm = RB_RIGHT(elm, field);				\

+		while ((left = RB_LEFT(elm, field)) != nil)		\

+			elm = left;					\

+		child = RB_RIGHT(elm, field);				\

+		parent = RB_PARENT(elm, field);				\

+		color = RB_COLOR(elm, field);				\

+		if (child)						\

+			RB_PARENT(child, field) = parent;		\

+		if (parent) {						\

+			if (RB_LEFT(parent, field) == elm)		\

+				RB_LEFT(parent, field) = child;		\

+			else						\

+				RB_RIGHT(parent, field) = child;	\

+			RB_AUGMENT(parent);				\

+		} else							\

+			RB_ROOT(head) = child;				\

+		if (RB_PARENT(elm, field) == old)			\

+			parent = elm;					\

+		(elm)->field = (old)->field;				\

+		if (RB_PARENT(old, field)) {				\

+			if (RB_LEFT(RB_PARENT(old, field), field) == old)\

+				RB_LEFT(RB_PARENT(old, field), field) = elm;\

+			else						\

+				RB_RIGHT(RB_PARENT(old, field), field) = elm;\

+			RB_AUGMENT(RB_PARENT(old, field));		\

+		} else							\

+			RB_ROOT(head) = elm;				\

+		RB_PARENT(RB_LEFT(old, field), field) = elm;		\

+		if (RB_RIGHT(old, field))				\

+			RB_PARENT(RB_RIGHT(old, field), field) = elm;	\

+		if (parent) {						\

+			left = parent;					\

+			do {						\

+				RB_AUGMENT(left);			\

+			} while ((left = RB_PARENT(left, field)) != nil); \

+		}							\

+		goto color;						\

+	}								\

+	parent = RB_PARENT(elm, field);					\

+	color = RB_COLOR(elm, field);					\

+	if (child)							\

+		RB_PARENT(child, field) = parent;			\

+	if (parent) {							\

+		if (RB_LEFT(parent, field) == elm)			\

+			RB_LEFT(parent, field) = child;			\

+		else							\

+			RB_RIGHT(parent, field) = child;		\

+		RB_AUGMENT(parent);					\

+	} else								\

+		RB_ROOT(head) = child;					\

+color:									\

+	if (color == RB_BLACK)						\

+		name##_RB_REMOVE_COLOR(head, parent, child);		\

+	return (old);							\

+}									\

+#define RB_GENERATE_INSERT(name, type, field, cmp, attr)		\

+/* Inserts a node into the RB tree */					\

+attr struct type *							\

+name##_RB_INSERT(struct name *head, struct type *elm)			\

+{									\

+	struct type *tmp;						\

+	struct type *parent = nil;					\

+	int comp = 0;							\

+	tmp = RB_ROOT(head);						\

+	while (tmp) {							\

+		parent = tmp;						\

+		comp = (cmp)(elm, parent);				\

+		if (comp < 0)						\

+			tmp = RB_LEFT(tmp, field);			\

+		else if (comp > 0)					\

+			tmp = RB_RIGHT(tmp, field);			\

+		else							\

+			return (tmp);					\

+	}								\

+	RB_SET(elm, parent, field);					\

+	if (parent != nil) {						\

+		if (comp < 0)						\

+			RB_LEFT(parent, field) = elm;			\

+		else							\

+			RB_RIGHT(parent, field) = elm;			\

+		RB_AUGMENT(parent);					\

+	} else								\

+		RB_ROOT(head) = elm;					\

+	name##_RB_INSERT_COLOR(head, elm);				\

+	return (nil);							\

+}

+#define RB_GENERATE_FIND(name, type, field, cmp, attr)			\

+/* Finds the node with the same key as elm */				\

+attr struct type *							\

+name##_RB_FIND(struct name *head, struct type *elm)			\

+{									\

+	struct type *tmp = RB_ROOT(head);				\

+	int comp;							\

+	while (tmp) {							\

+		comp = cmp(elm, tmp);					\

+		if (comp < 0)						\

+			tmp = RB_LEFT(tmp, field);			\

+		else if (comp > 0)					\

+			tmp = RB_RIGHT(tmp, field);			\

+		else							\

+			return (tmp);					\

+	}								\

+	return (nil);							\

+}

+#define RB_GENERATE_NFIND(name, type, field, cmp, attr)			\

+/* Finds the first node greater than or equal to the search key */	\

+attr struct type *							\

+name##_RB_NFIND(struct name *head, struct type *elm)			\

+{									\

+	struct type *tmp = RB_ROOT(head);				\

+	struct type *res = nil;					\

+	int comp;							\

+	while (tmp) {							\

+		comp = cmp(elm, tmp);					\

+		if (comp < 0) {						\

+			res = tmp;					\

+			tmp = RB_LEFT(tmp, field);			\

+		}							\

+		else if (comp > 0)					\

+			tmp = RB_RIGHT(tmp, field);			\

+		else							\

+			return (tmp);					\

+	}								\

+	return (res);							\

+}

+#define RB_GENERATE_NEXT(name, type, field, attr)			\

+/* ARGSUSED */								\

+attr struct type *							\

+name##_RB_NEXT(struct type *elm)					\

+{									\

+	if (RB_RIGHT(elm, field)) {					\

+		elm = RB_RIGHT(elm, field);				\

+		while (RB_LEFT(elm, field))				\

+			elm = RB_LEFT(elm, field);			\

+	} else {							\

+		if (RB_PARENT(elm, field) &&				\

+		    (elm == RB_LEFT(RB_PARENT(elm, field), field)))	\

+			elm = RB_PARENT(elm, field);			\

+		else {							\

+			while (RB_PARENT(elm, field) &&			\

+			    (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\

+				elm = RB_PARENT(elm, field);		\

+			elm = RB_PARENT(elm, field);			\

+		}							\

+	}								\

+	return (elm);							\

+}

+#define RB_GENERATE_PREV(name, type, field, attr)			\

+/* ARGSUSED */								\

+attr struct type *							\

+name##_RB_PREV(struct type *elm)					\

+{									\

+	if (RB_LEFT(elm, field)) {					\

+		elm = RB_LEFT(elm, field);				\

+		while (RB_RIGHT(elm, field))				\

+			elm = RB_RIGHT(elm, field);			\

+	} else {							\

+		if (RB_PARENT(elm, field) &&				\

+		    (elm == RB_RIGHT(RB_PARENT(elm, field), field)))	\

+			elm = RB_PARENT(elm, field);			\

+		else {							\

+			while (RB_PARENT(elm, field) &&			\

+			    (elm == RB_LEFT(RB_PARENT(elm, field), field)))\

+				elm = RB_PARENT(elm, field);		\

+			elm = RB_PARENT(elm, field);			\

+		}							\

+	}								\

+	return (elm);							\

+}

+#define RB_GENERATE_MINMAX(name, type, field, attr)			\

+attr struct type *							\

+name##_RB_MINMAX(struct name *head, int val)				\

+{									\

+	struct type *tmp = RB_ROOT(head);				\

+	struct type *parent = nil;					\

+	while (tmp) {							\

+		parent = tmp;						\

+		if (val < 0)						\

+			tmp = RB_LEFT(tmp, field);			\

+		else							\

+			tmp = RB_RIGHT(tmp, field);			\

+	}								\

+	return (parent);						\

+}

+#define RB_NEGINF	-1

+#define RB_INF	1

+#define RB_INSERT(name, x, y)	name##_RB_INSERT(x, y)

+#define RB_REMOVE(name, x, y)	name##_RB_REMOVE(x, y)

+#define RB_FIND(name, x, y)	name##_RB_FIND(x, y)

+#define RB_NFIND(name, x, y)	name##_RB_NFIND(x, y)

+#define RB_NEXT(name, x, y)	name##_RB_NEXT(y)

+#define RB_PREV(name, x, y)	name##_RB_PREV(y)

+#define RB_MIN(name, x)		name##_RB_MINMAX(x, RB_NEGINF)

+#define RB_MAX(name, x)		name##_RB_MINMAX(x, RB_INF)

+#define RB_FOREACH(x, name, head)					\

+	for ((x) = RB_MIN(name, head);					\

+	     (x) != nil;						\

+	     (x) = name##_RB_NEXT(x))

+#define RB_FOREACH_FROM(x, name, y)					\

+	for ((x) = (y);							\

+	    ((x) != nil) && ((y) = name##_RB_NEXT(x), (x) != nil);	\

+	     (x) = (y))

+#define RB_FOREACH_SAFE(x, name, head, y)				\

+	for ((x) = RB_MIN(name, head);					\

+	    ((x) != nil) && ((y) = name##_RB_NEXT(x), (x) != nil);	\

+	     (x) = (y))

+#define RB_FOREACH_REVERSE(x, name, head)				\

+	for ((x) = RB_MAX(name, head);					\

+	     (x) != nil;						\

+	     (x) = name##_RB_PREV(x))

+#define RB_FOREACH_REVERSE_FROM(x, name, y)				\

+	for ((x) = (y);							\

+	    ((x) != nil) && ((y) = name##_RB_PREV(x), (x) != nil);	\

+	     (x) = (y))

+#define RB_FOREACH_REVERSE_SAFE(x, name, head, y)			\

+	for ((x) = RB_MAX(name, head);					\

+	    ((x) != nil) && ((y) = name##_RB_PREV(x), (x) != nil);	\

+	     (x) = (y))

--- /dev/null

+++ b/sys/src/cmd/ext4srv/mkfile

@@ -1,0 +1,61 @@

+</$objtype/mkfile

+TARG=ext4srv

+CFLAGS=$CFLAGS -D__${objtype}__ -p -Iinclude

+OFILES=\

+	ext4.$O\

+	ext4_balloc.$O\

+	ext4_bcache.$O\

+	ext4_bitmap.$O\

+	ext4_block_group.$O\

+	ext4_blockdev.$O\

+	ext4_crc32.$O\

+	ext4_debug.$O\

+	ext4_dir.$O\

+	ext4_dir_idx.$O\

+	ext4_extent.$O\

+	ext4_fs.$O\

+	ext4_hash.$O\

+	ext4_ialloc.$O\

+	ext4_inode.$O\

+	ext4_journal.$O\

+	ext4_mbr.$O\

+	ext4_mkfs.$O\

+	ext4_super.$O\

+	ext4_trans.$O\

+	ext4srv.$O\

+	group.$O\

+	part.$O\

+HFILES=\

+	common.h\

+	group.h\

+	include/ext4.h\

+	include/ext4_balloc.h\

+	include/ext4_bcache.h\

+	include/ext4_bitmap.h\

+	include/ext4_block_group.h\

+	include/ext4_blockdev.h\

+	include/ext4_config.h\

+	include/ext4_crc32.h\

+	include/ext4_debug.h\

+	include/ext4_dir.h\

+	include/ext4_dir_idx.h\

+	include/ext4_extent.h\

+	include/ext4_fs.h\

+	include/ext4_hash.h\

+	include/ext4_ialloc.h\

+	include/ext4_inode.h\

+	include/ext4_journal.h\

+	include/ext4_mbr.h\

+	include/ext4_misc.h\

+	include/ext4_mkfs.h\

+	include/ext4_super.h\

+	include/ext4_trans.h\

+	include/ext4_types.h\

+	include/queue.h\

+	include/tree.h\

+BIN=/$objtype/bin

+</sys/src/cmd/mkone

--- /dev/null

+++ b/sys/src/cmd/ext4srv/part.c

@@ -1,0 +1,454 @@

+#include "ext4_config.h"

+#include "ext4.h"

+#include <thread.h>

+#include "ext4_mkfs.h"

+#include "group.h"

+#include "common.h"

+#define TRACE(fmt, ...) //fprint(2, fmt, __VA_ARGS__)

+#define BDEV2PART(bdev) ((bdev)->bdif->p_user)

+static struct {

+	QLock;

+	Part *ps;

+	u32int id;

+}sv;

+static long

+preadn(int f, void *av, long n, vlong offset)

+{

+	char *a;

+	long m, t;

+	assert(offset >= 0);

+	a = av;

+	t = 0;

+	while(t < n){

+		m = pread(f, a+t, n-t, offset);

+		if(m <= 0){

+			if(t == 0)

+				return m;

+			break;

+		}

+		t += m;

+		offset += m;

+	}

+	return t;

+}

+static int

+bdopen(struct ext4_blockdev *bdev)

+{

+	Part *p;

+	p = BDEV2PART(bdev);

+	TRACE("bdopen %p\n", p);

+	USED(p);

+	return 0;

+}

+static int

+bdread(struct ext4_blockdev *bdev, void *buf, u64int blkid, u32int blkcnt)

+{

+	Part *p;

+	p = BDEV2PART(bdev);

+	TRACE("bdread %p %p %llud %ud\n", p, buf, blkid, blkcnt);

+	if(preadn(p->f, buf, blkcnt*p->bdif.ph_bsize, blkid*p->bdif.ph_bsize) != blkcnt*p->bdif.ph_bsize)

+		return -1;

+	return 0;

+}

+static int

+bdwrite(struct ext4_blockdev *bdev, const void *buf, u64int blkid, u32int blkcnt)

+{

+	Part *p;

+	p = BDEV2PART(bdev);

+	TRACE("bdwrite %p %p %llud %ud\n", p, buf, blkid, blkcnt);

+	if(pwrite(p->f, buf, blkcnt*p->bdif.ph_bsize, blkid*p->bdif.ph_bsize) != blkcnt*p->bdif.ph_bsize)

+		return -1;

+	return 0;

+}

+static int

+bdclose(struct ext4_blockdev *bdev)

+{

+	Part *p;

+	p = BDEV2PART(bdev);

+	TRACE("bdclose %p\n", p);

+	USED(p);

+	return 0;

+}

+static int

+getblksz(char *dev, u32int *blksz)

+{

+	char *s, *e, *g, *a[5];

+	vlong x;

+	int f, n, r;

+	/* default blksz if couldn't find out the real one */

+	*blksz = 512;

+	f = -1;

+	g = nil;

+	if((s = smprint("%s_ctl", dev)) == nil)

+		goto error;

+	cleanname(s);

+	if((e = strrchr(s, '/')) == nil)

+		e = s;

+	strcpy(e, "/ctl");

+	f = open(s, OREAD);

+	free(s);

+	if(f >= 0){

+		if((g = malloc(4096)) == nil)

+			goto error;

+		for(n = 0; (r = read(f, g+n, 4096-n-1)) > 0; n += r);

+		g[n] = 0;

+		close(f);

+		f = -1;

+		for(s = g; (e = strchr(s, '\n')) != nil; s = e+1){

+			*e = 0;

+			if(tokenize(s, a, nelem(a)) >= 3 && strcmp(a[0], "geometry") == 0){

+				x = strtoll(a[2], &e, 0);

+				if(x > 0 && *e == 0)

+					*blksz = x;

+				if(*blksz != x){

+					werrstr("invalid block size: %s", a[2]);

+					goto error;

+				}

+				break;

+			}

+		}

+	}

+	close(f);

+	free(g);

+	return 0;

+error:

+	close(f);

+	free(g);

+	return -1;

+}

+static int

+fmtpart(Fmt *f)

+{

+	Part *p;

+	p = va_arg(f->args, Part*);

+	return fmtprint(f, f->r == 'M' ? "/%#llux" : "dev%#llux", p->qid.path);

+}

+static void *

+readfile(Part *p, char *path, usize *sz)

+{

+	usize n, got;

+	char *s, *d;

+	ext4_file f;

+	int r;

+	d = nil;

+	while(*path == '/')

+		path++;

+	s = smprint("%M/%s", p, path);

+	r = ext4_fopen2(&f, s, O_RDONLY);

+	free(s);

+	if(r == 0){

+		*sz = ext4_fsize(&f);

+		if((d = malloc(*sz+1)) == nil){

+			ext4_fclose(&f);

+			goto error;

+		}

+		for(n = 0; n < *sz; n += got){

+			if(ext4_fread(&f, d+n, *sz-n, &got) < 0){

+				werrstr("readfile: %r");

+				ext4_fclose(&f);

+				goto error;

+			}

+			if(got == 0)

+				break;

+		}

+		*sz = n;

+		ext4_fclose(&f);

+	}else{

+error:

+		free(d);

+		d = nil;

+		*sz = 0;

+	}

+	return d;

+}

+static int

+mountpart(Part *p, Opts *opts)

+{

+	usize sz;

+	char *gr;

+	int r;

+	r = 0;

+	if(snprint(p->dev, sizeof(p->dev), "%Ð", p) >= sizeof(p->dev)){

+		werrstr("part path too long");

+		goto error;

+	}

+	if(snprint(p->mnt, sizeof(p->mnt), "%M/", p) >= sizeof(p->mnt)){

+		werrstr("part path too long");

+		goto error;

+	}

+	if(ext4_device_register(&p->bdev, p->dev) < 0){

+		werrstr("register: %r");

+		goto error;

+	}

+	if(ext4_mount(p->dev, p->mnt, opts->rdonly) < 0){

+		werrstr("mount: %r");

+		goto error;

+	}

+	if(ext4_mount_setup_locks(p->mnt, &p->oslocks) < 0){

+		werrstr("locks: %r");

+		goto error;

+	}

+	if(ext4_recover(p->mnt) < 0){

+		werrstr("recover: %r");

+		goto error;

+	}

+	if(ext4_journal_start(p->mnt) < 0){

+		werrstr("journal: %r");

+		goto error;

+	}

+	if(opts->cachewb)

+		ext4_cache_write_back(p->mnt, 1);

+	if(ext4_get_sblock(p->mnt, &p->sb) < 0){

+		werrstr("sblock: %r");

+		goto error;

+	}

+	if(opts->group != nil){

+		r = loadgroups(&p->groups, opts->group);

+	}else if((gr = readfile(p, "/etc/group", &sz)) != nil){

+		gr[sz] = 0;

+		r = loadgroups(&p->groups, gr);

+		free(gr);

+	}

+	if(r != 0)

+		goto error;

+	return 0;

+error:

+	werrstr("mountpart: %r");

+	return -1;

+}

+static void

+plock(void *aux)

+{

+	Part *p;

+	p = aux;

+	qlock(p);

+}

+static void

+punlock(void *aux)

+{

+	Part *p;

+	p = aux;

+	qunlock(p);

+}

+Part *

+openpart(char *dev, Opts *opts)

+{

+	struct ext4_mkfs_info info;

+	struct ext4_fs fs;

+	u32int blksz;

+	Part *p;

+	char *s;

+	Dir *d;

+	int f;

+	d = nil;

+	p = nil;

+	s = nil;

+	qlock(&sv);

+	fmtinstall(L'Ð', fmtpart);

+	fmtinstall('M', fmtpart);

+	f = open(dev, ORDWR);

+	if(f < 0 || (d = dirfstat(f)) == nil)

+		goto error;

+	/* see if it's already opened */

+	for(p = sv.ps; p != nil && p->qid.path != d->qid.path; p = p->next);

+	if(p == nil){ /* no? then make one */

+		if(getblksz(dev, &blksz) != 0 || (p = calloc(1, sizeof(*p)+blksz+strlen(dev)+1)) == nil)

+			goto error;

+		p->f = f;

+		p->qid = d->qid;

+		p->bdev.bdif = &p->bdif;

+		p->bdev.part_size = d->length;

+		p->bdif.open = bdopen;

+		p->bdif.bread = bdread;

+		p->bdif.bwrite = bdwrite;

+		p->bdif.close = bdclose;

+		p->bdif.ph_bsize = blksz;

+		p->bdif.ph_bcnt = d->length/blksz;

+		p->bdif.ph_bbuf = p->blkbuf;

+		p->oslocks.lock = plock;

+		p->oslocks.unlock = punlock;

+		p->oslocks.p_user = p;

+		p->bdif.p_user = p;

+		p->partdev = (char*)(p+1) + blksz;

+		strcpy(p->partdev, dev);

+		if(opts->fstype > 1){

+			memset(&fs, 0, sizeof(fs));

+			memset(&info, 0, sizeof(info));

+			info.block_size = opts->blksz;

+			snprint(info.label, sizeof(info.label), opts->label);

+			info.inode_size = opts->inodesz;

+			info.inodes = opts->ninode;

+			info.journal = true;

+			if(ext4_mkfs(&fs, &p->bdev, &info, opts->fstype) < 0){

+				werrstr("mkfs: %r");

+				goto error;

+			}

+		}

+		if(mountpart(p, opts) != 0)

+			goto error;

+		p->next = sv.ps;

+		if(sv.ps != nil)

+			sv.ps->prev = p;

+		sv.ps = p;

+		p->qidmask.path = ((uvlong)sv.id++) << 32;

+		p->qidmask.type = QTDIR;

+	}else{

+		close(f);

+	}

+	free(d);

+	free(s);

+	qunlock(&sv);

+	return p;

+error:

+	werrstr("openpart: %r");

+	if(f >= 0)

+		close(f);

+	free(d);

+	free(p);

+	free(s);

+	qunlock(&sv);

+	return nil;

+}

+static void

+_closepart(Part *p)

+{

+	ext4_cache_write_back(p->mnt, 0);

+	if(ext4_journal_stop(p->mnt) < 0)

+		fprint(2, "closepart: journal %s: %r\n", p->mnt);

+	if(ext4_umount(p->mnt) < 0)

+		fprint(2, "closepart: umount %s: %r\n", p->mnt);

+	if(ext4_device_unregister(p->dev) < 0)

+		fprint(2, "closepart: unregister %s: %r\n", p->dev);

+	close(p->f);

+	if(p->prev != nil)

+		p->prev = p->next;

+	if(p->next != nil)

+		p->next->prev = p->prev;

+	if(p == sv.ps)

+		sv.ps = p->next;

+	freegroups(&p->groups);

+	free(p);

+}

+void

+closepart(Part *p)

+{

+	qlock(&sv);

+	_closepart(p);

+	qunlock(&sv);

+}

+void

+closeallparts(void)

+{

+	qlock(&sv);

+	while(sv.ps != nil)

+		_closepart(sv.ps);

+	qunlock(&sv);

+}

+void

+statallparts(void)

+{

+	struct ext4_mount_stats s;

+	uvlong div;

+	Part *p;

+	qlock(&sv);

+	for(p = sv.ps; p != nil; p = p->next){

+		if(ext4_mount_point_stats(p->mnt, &s) < 0){

+			fprint(2, "%s: %r\n", p->partdev);

+		}else{

+			print(

+				"%s (inodes) free %ud, used %ud, total %ud\n",

+				p->partdev,

+				s.free_inodes_count,

+				s.inodes_count-s.free_inodes_count,

+				s.inodes_count

+			);

+			print(

+				"%s (blocks) free %llud, used %llud, total %llud, each %ud\n",

+				p->partdev,

+				s.free_blocks_count,

+				s.blocks_count-s.free_blocks_count,

+				s.blocks_count, s.block_size

+			);

+			div = 1024/(s.block_size/1024);

+			print(

+				"%s (MB) free %llud, used %llud, total %llud\n",

+				p->partdev,

+				s.free_blocks_count/div,

+				(s.blocks_count-s.free_blocks_count)/div,

+				s.blocks_count/div

+			);

+		}

+	}

+	qunlock(&sv);

+}

+void

+syncallparts(void)

+{

+	Part *p;

+	qlock(&sv);

+	for(p = sv.ps; p != nil; p = p->next){

+		if(ext4_cache_flush(p->mnt) < 0)

+			fprint(2, "%s: %r\n", p->partdev);

+	}

+	qunlock(&sv);

+}