shithub: riscv

Download patch

ref: 10b25b512218fa7fc7b9239bf7f70b03bbd7f4ab
parent: f9230151dd405278cd78ec0e32ba166cc418ab9a
author: Sigrid Solveig Haflínudóttir <[email protected]>
date: Mon Feb 12 14:12:02 EST 2024

replace ext2srv with ext4srv

--- a/sys/man/4/ext2srv
+++ /dev/null
@@ -1,110 +1,0 @@
-.TH EXT2SRV 4
-.SH NAME
-ext2srv \- ext2 file system
-.SH SYNOPSIS
-.B ext2srv
-[
-.B -vrs
-] [
-.B -f
-.I file
-] [
-.B -p
-.I passwd
-] [
-.B -g
-.I group
-] [
-.I service
-]
-.SH DESCRIPTION
-.I Ext2srv
-is a file server that interprets the Linux Second Extended File System.
-A single instance of
-.I ext2srv
-can provide access to multiple ext2 partitions simultaneously.
-.PP
-.I Ext2srv
-posts a file descriptor named
-.I service
-(default
-.BR ext2 )
-in the
-.B /srv
-directory.
-To access an ext2 file system on a device, use
-.B mount
-with the
-.I spec
-argument
-(see
-.IR bind (1))
-the name of the file holding the raw ext2 file system, typically the disk or partition.
-If
-.I spec
-is undefined in the
-.BR mount ,
-.I ext2srv
-will use
-.I file
-as the default name for the device holding the file system.
-.PP
-Normally
-.I ext2srv
-creates a pipe to act as the communications channel between
-itself and its clients.
-The
-.B -s
-flag instructs
-.I ext2srv
-to use its standard input and output instead.
-This flag also prevents the creation of an explicit service file in
-.BR /srv .
-.PP
-The
-.B -v
-flag causes verbose output for debugging, while
-the
-.B -r
-flag (recommended) makes the file system read-only.
-The optional
-.B -p
-and
-.B -g
-flags specify Unix-format password (respectively group) files
-that give the mapping between the numeric user- and group-ID
-numbers in the ext2 file system and the strings reported by Plan 9 status
-inquiries.
-.PP
-There is no authentication or permission checking.
-Anyone who can access the ext2 file system will have full access
-to all its files, including write access if
-.I ext2srv
-is not started with the
-.B -r
-flag, irrespective of file ownership and permission flags.
-.PP
-Some file system state is cached in memory, and may
-be flushed only when the file system is unmounted.
-Therefore if
-.I ext2srv
-is stopped or the machine is rebooted while an ext2 file system
-is still mounted,
-the superblock on the device will have been marked `not valid'
-(unless the
-.B -r
-flag was used),
-and a
-.I fsck
-will be required before that file system may be mounted again.
-.SH BUGS
-There is no authentication or permission checking.
-The implementation has not tracked any changes to the ext2
-specification since it was written.
-There may be other bugs.
-It is advisable to use
-.I ext2srv
-in read-only mode whenever possible.
-.SH AUTHOR
-Bodet Laurent ([email protected]),
-with later updates by Russ Cox and Richard Miller.
--- /dev/null
+++ b/sys/man/4/ext4srv
@@ -1,0 +1,142 @@
+.TH EXT4SRV 4
+.SH NAME
+ext4srv \- ext4 file system
+.SH SYNOPSIS
+.B ext4srv
+[
+.B -Clrs
+] [
+.B -g
+.I groupfile
+] [
+.B -R
+.I uid
+] [
+.I service
+]
+.PP
+.B ext4srv
+.B -M
+.I (2|3|4)
+[
+.B -L
+.I label
+] [
+.B -b
+.I blksize
+] [
+.B -N
+.I numinodes
+] [
+.B -I
+.I inodesize
+]
+.I device
+.SH DESCRIPTION
+.I Ext4srv
+is a file server that interprets the Linux Second, Third and Fourth
+Extended File Systems.
+A single instance of
+.I ext4srv
+can provide access to multiple ext2, ext3 and ext4 partitions
+simultaneously.
+.PP
+.I Ext4srv
+posts a file descriptor named
+.I service
+(default
+.BR ext4 )
+in the
+.B /srv
+directory.
+To access an ext4 file system on a device, use
+.B mount
+with the
+.I spec
+argument
+(see
+.IR bind (1))
+the name of the file holding the raw ext4 file system, typically the disk or partition.
+If
+.I spec
+is undefined in the
+.BR mount ,
+.I ext4srv
+will use
+.I file
+as the default name for the device holding the file system.
+.PP
+Normally
+.I ext4srv
+creates a pipe to act as the communications channel between
+itself and its clients.
+The
+.B -s
+flag instructs
+.I ext4srv
+to use its standard input and output instead.
+This flag also prevents the creation of an explicit service file in
+.BR /srv .
+.PP
+The
+.B -r
+flag (recommended) makes the file system read-only.
+The optional
+.B -g
+flags specify Unix-format group file that give the mapping between the
+numeric user- and group-ID numbers in the ext4 file system and the
+strings reported by Plan 9 status inquiries.
+.PP
+With
+.B -R
+option the filesystem can be mounted in "root" mode, allowing full access regardless
+of permissions. The usual
+.I uid
+in this case is
+.IR 0 .
+.PP
+Optional flag
+.B -l
+enables symlink resolving, otherwise symlinks are hidden by
+default entirely, as Plan 9 does not have that concept.
+.PP
+Some file system state is cached in memory, and may
+be flushed only when the file system is unmounted if
+.B -C
+flag is used, which enabled write-back cache.
+Therefore if
+.I ext4srv
+is stopped or the machine is rebooted while an ext4 file system is
+still mounted, the superblock on the device will have been marked `not
+valid'
+(unless the
+.B -r
+flag was used).
+.SH MKFS
+A different mode of
+.I ext4srv
+is enabled with
+.B -M
+option that accepts the file system version
+.RI ( 2
+for
+.I ext2
+and so on).
+In this mode filesystem is initialized on the specified
+.I device
+and all existing data on it is destroyed.
+.PP
+Additional options may be specified, for example
+.B -L
+may be used to set the filesystem label.
+.SH BUGS
+Yes.
+.PP
+Permission checking is very basic and may not be complete.
+There may be many bugs.
+It is advisable to use
+.I ext4srv
+in read-only mode whenever possible.
+.SH HISTORY
+.I Ext4srv
+first appeared in 9front (February, 2024).
--- a/sys/src/cmd/aux/multi/mkfile
+++ b/sys/src/cmd/aux/multi/mkfile
@@ -33,7 +33,7 @@
 	dossrv\
 	echo\
 	ed\
-	ext2srv\
+	ext4srv\
 #	fcp\
 	grep\
 	hget\
--- a/sys/src/cmd/ext2srv/chat.c
+++ /dev/null
@@ -1,53 +1,0 @@
-#include <u.h>
-#include <libc.h>
-#include <fcall.h>
-#include <thread.h>
-#include <9p.h>
-#include "dat.h"
-#include "fns.h"
-
-#define	SIZE	1024
-#define	DOTDOT	(&fmt+1)
-
-int	chatty;
-
-void
-chat(char *fmt, ...)
-{
-	char buf[SIZE], *out;
-	va_list arg;
-
-	if (!chatty)
-		return;
-
-	va_start(arg, fmt);
-	out = vseprint(buf, buf+sizeof(buf), fmt, arg);
-	va_end(arg);
-	write(2, buf, (long)(out-buf));
-}
-
-void
-mchat(char *fmt, ...)
-{
-	char buf[SIZE], *out;
-	va_list arg;
-
-	va_start(arg, fmt);
-	out = vseprint(buf, buf+sizeof(buf), fmt, arg);
-	va_end(arg);
-	write(2, buf, (long)(out-buf));
-}
-void
-panic(char *fmt, ...)
-{
-	char buf[SIZE];
-	va_list arg;
-	int n;
-
-	n = sprint(buf, "%s %d: panic ", argv0, getpid());
-	va_start(arg, fmt);
-	vseprint(buf+n, buf+sizeof(buf)-n, fmt, arg);
-	va_end(arg);
-	fprint(2, "%s: %r\n", buf);
-	exits("panic");
-}
--- a/sys/src/cmd/ext2srv/dat.h
+++ /dev/null
@@ -1,222 +1,0 @@
-typedef struct Xfs	Xfs;
-typedef struct Xfile	Xfile;
-typedef struct Iobuf	Iobuf;
-typedef struct Ext2 Ext2;
-
-typedef struct SuperBlock SuperBlock;
-typedef struct GroupDesc GroupDesc;
-typedef struct Inode Inode;
-typedef struct DirEntry DirEntry;
-
-#define SECTORSIZE	512
-#define OFFSET_SUPER_BLOCK	1024
-
-#define EXT2_SUPER_MAGIC	0xEF53
-#define EXT2_MIN_BLOCK_SIZE  1024
-#define EXT2_MAX_BLOCK_SIZE  4096
-#define EXT2_ROOT_INODE	2
-#define EXT2_FIRST_INO		11
-#define EXT2_VALID_FS	0x0001
-#define EXT2_ERROR_FS	0x0002
-
-/*
- * Structure of the super block
- */
-struct SuperBlock {
-	uint	s_inodes_count;		/* Inodes count */
-	uint	s_blocks_count;		/* Blocks count */
-	uint	s_r_blocks_count;	/* Reserved blocks count */
-	uint	s_free_blocks_count;	/* Free blocks count */
-	uint	s_free_inodes_count;	/* Free inodes count */
-	uint	s_first_data_block;	/* First Data Block */
-	uint	s_log_block_size;	/* Block size */
-	int	s_log_frag_size;	/* Fragment size */
-	uint	s_blocks_per_group;	/* # Blocks per group */
-	uint	s_frags_per_group;	/* # Fragments per group */
-	uint	s_inodes_per_group;	/* # Inodes per group */
-	uint	s_mtime;		/* Mount time */
-	uint	s_wtime;		/* Write time */
-	ushort	s_mnt_count;		/* Mount count */
-	short	s_max_mnt_count;	/* Maximal mount count */
-	ushort	s_magic;		/* Magic signature */
-	ushort	s_state;		/* File system state */
-	ushort	s_errors;		/* Behaviour when detecting errors */
-	ushort	s_pad;
-	uint	s_lastcheck;		/* time of last check */
-	uint	s_checkinterval;	/* max. time between checks */
-	uint	s_creator_os;		/* OS */
-	uint	s_rev_level;		/* Revision level */
-	ushort	s_def_resuid;		/* Default uid for reserved blocks */
-	ushort	s_def_resgid;		/* Default gid for reserved blocks */
-	uint	s_reserved[235];	/* Padding to the end of the block */
-};
-
-/*
- * Structure of a blocks group descriptor
- */
-struct GroupDesc
-{
-	uint	bg_block_bitmap;		/* Blocks bitmap block */
-	uint	bg_inode_bitmap;		/* Inodes bitmap block */
-	uint	bg_inode_table;		/* Inodes table block */
-	ushort	bg_free_blocks_count;	/* Free blocks count */
-	ushort	bg_free_inodes_count;	/* Free inodes count */
-	ushort	bg_used_dirs_count;	/* Directories count */
-	ushort	bg_pad;
-	uint	bg_reserved[3];
-};
-
-/*
- * Constants relative to the data blocks
- */
-#define	EXT2_NDIR_BLOCKS		12
-#define	EXT2_IND_BLOCK			EXT2_NDIR_BLOCKS
-#define	EXT2_DIND_BLOCK			(EXT2_IND_BLOCK + 1)
-#define	EXT2_TIND_BLOCK			(EXT2_DIND_BLOCK + 1)
-#define	EXT2_N_BLOCKS			(EXT2_TIND_BLOCK + 1)
-
-/*
- * Structure of an inode on the disk
- */
-struct Inode {
-	ushort i_mode;		/* File mode */
-	ushort i_uid;		/* Owner Uid */
-	uint  i_size;		/* Size in bytes */
-	uint  i_atime;		/* Access time */
-	uint i_ctime;		/* Creation time */
-	uint  i_mtime;		/* Modification time */
-	uint  i_dtime;		/* Deletion Time */
-	ushort i_gid;		/* Group Id */
-	ushort i_links_count;	/* Links count */
-	uint  i_blocks;	/* Blocks count */
-	uint  i_flags;		/* File flags */
-	uint osd1;				
-	uint	i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
-	uint	i_version;	/* File version (for NFS) */
-	uint	i_file_acl;	/* File ACL */
-	uint	i_dir_acl;	/* Directory ACL */
-	uint	i_faddr;		/* Fragment address */
-	uchar osd2[12];
-};
-
-/*
- * Structure of a directory entry
- */
-#define EXT2_NAME_LEN 255
-#define DIR_REC_LEN(name_len)	(((name_len) + 8 + 3) & ~3)
-
-struct DirEntry {
-	uint	inode;			/* Inode number */
-	ushort	rec_len;		/* Directory entry length */
-	uchar	name_len;		/* Name length */
-	uchar	reserved;
-	char	name[EXT2_NAME_LEN];	/* File name */
-};
-
-#define S_IFMT  00170000
-#define S_IFLNK	 0120000
-#define S_IFREG  0100000
-#define S_IFDIR  0040000
-
-#define S_ISLNK(m)	(((m) & S_IFMT) == S_IFLNK)
-#define S_ISREG(m)	(((m) & S_IFMT) == S_IFREG)
-#define S_ISDIR(m)	(((m) & S_IFMT) == S_IFDIR)
-
-#define DEFAULT_UID	200
-#define DEFAULT_GID	100
-
-struct Iobuf
-{
-	Xfs *dev;
-	long	addr;
-	Iobuf *next;
-	Iobuf *prev;
-	Iobuf *hash;
-	int busy;
-	int dirty;
-	char *iobuf;
-};
-
-struct Xfs{
-	Xfs *next;
-	char *name;		/* of file containing external f.s. */
-	Qid	qid;		/* of file containing external f.s. */
-	long	ref;		/* attach count */
-	Qid	rootqid;	/* of plan9 constructed root directory */
-	short	dev;
-	short	fmt;
-	void *ptr; 
-
-	/* data from super block */
-
-	int block_size;
-	int desc_per_block;
-	int inodes_per_group;
-	int inodes_per_block;
-	int addr_per_block;
-	int blocks_per_group;
-
-	int ngroups;
-	int superaddr, superoff;
-	int grpaddr;
-};
-
-struct Xfile{
-	Xfile *next;		/* in hash bucket */
-	long	client;
-	long	fid;
-	Xfs *	xf;
-	void *	ptr;
-
-	uint inbr;		/* inode nbr */
-	uint pinbr;	/* parrent inode */
-	ulong bufaddr;	/* addr of inode block */
-	ulong bufoffset;
-	int root;		/* true on attach for ref count */
-	int dirindex;	/* next dir entry to read */
-};
-
-#define EXT2_SUPER		1
-#define EXT2_DESC		2
-#define EXT2_BBLOCK	3
-#define EXT2_BINODE	4
-
-struct Ext2{
-	char type;
-	union{
-		SuperBlock *sb;
-		GroupDesc *gd;
-		char *bmp;
-	}u;
-	Iobuf *buf;
-};
-
-#define DESC_ADDR(xf,n)		( (xf)->grpaddr + ((n)/(xf)->desc_per_block) )
-#define DESC_OFFSET(xf,d,n)	( ((GroupDesc *)(d)) + ((n)%(xf)->desc_per_block) )
-
-enum{
-	Asis, Clean, Clunk
-};
-
-enum{
-	Enevermind,
-	Eformat,
-	Eio,
-	Enomem,
-	Enonexist,
-	Eexist,
-	Eperm,
-	Enofilsys,
-	Eauth,
-	Enospace,
-	Elink,
-	Elongname,
-	Eintern,
-	Ecorrupt,
-	Enotclean
-};
-
-extern int	chatty;
-extern int	errno;
-extern char	*deffile;
-extern int rdonly;
--- a/sys/src/cmd/ext2srv/errstr.h
+++ /dev/null
@@ -1,17 +1,0 @@
-char *errmsg[] = {
-	[Enevermind]	"never mind",
-	[Eformat]	"unknown format",
-	[Eio]		"I/O error",
-	[Enomem]	"server out of memory",
-	[Enonexist]	"file does not exist",
-	[Eexist]	"file already exist",
-	[Eperm]		"permission denied",
-	[Enofilsys]	"no file system device specified",
-	[Eauth]		"authentication failed",
-	[Enospace]	"no space on device",
-	[Elink]	"write is only allowed in regular files",
-	[Elongname]	"name is too long",
-	[Eintern]	"internal Ext2 error",
-	[Ecorrupt]	"corrupt filesystem",
-	[Enotclean] "fs not clean ... running e2fsck is recommended"	
-};
--- a/sys/src/cmd/ext2srv/ext2fs.c
+++ /dev/null
@@ -1,348 +1,0 @@
-#include <u.h>
-#include <libc.h>
-#include <fcall.h>
-#include <thread.h>
-#include <9p.h>
-#include "dat.h"
-#include "fns.h"
-
-#define thdr	r->ifcall
-#define rhdr	r->ofcall
-
-extern int	errno;
-
-static void
-response(Req *r)
-{
-	char *err;
-
-	if (errno) {
-		err = xerrstr(errno);
-		chat("%s\n", err);
-		respond(r, err);
-	} else {
-		chat("OK\n");
-		respond(r, nil);
-	}
-}
-
-static void
-rattach(Req *r)
-{
-	Xfs *xf;
-	Xfile *root;
-
-	chat("attach(fid=%d,uname=\"%s\",aname=\"%s\",afid=\"%d\")...",
-		thdr.fid, thdr.uname, thdr.aname, thdr.afid);
-	
-	errno = 0;
-	root = xfile(r->fid, Clean);
-	if(!root){
-		errno = Enomem;
-		goto error;
-	}
-	root->xf = xf = getxfs(thdr.aname);
-	if(!xf)
-		goto error;
-	
-	/* now attach root inode */
-	if( get_inode(root, EXT2_ROOT_INODE) < 0 )
-		goto error;
-	
-	r->fid->qid.type = QTDIR;
-	r->fid->qid.vers = 0;
-	root->xf->rootqid = r->fid->qid;
-	root->pinbr = EXT2_ROOT_INODE;
-	root->root = 1;
-	rhdr.qid = r->fid->qid;
-	
-error:
-	response(r);
-}
-static char *
-rclone(Fid *fid, Fid *newfid)
-{
-	Xfile *of = xfile(fid, Asis);
-	Xfile *nf = xfile(newfid, Clean);
-
-	chat("clone(fid=%d,newfid=%d)...", fid->fid, newfid->fid);
-	errno = 0;
-	if(!of)
-		errno = Eio;
-	else if(!nf)
-		errno = Enomem;
-	else{
-		Xfile *next = nf->next;
-		*nf = *of;
-		nf->next = next;
-		nf->fid = newfid->fid;
-		nf->root = 0;
-	}
-	chat("%s\n", errno? xerrstr(errno) : "OK");
-	return errno ? xerrstr(errno) : 0;
-}
-static char *
-rwalk1(Fid *fid, char *name, Qid *qid)
-{
-	Xfile *f=xfile(fid, Asis);
-	int nr, sinbr = 0;
-
-	chat("walk1(fid=%d,name=\"%s\")...", fid->fid, name);
-	errno = 0;
-	if( !f ){
-		chat("no xfile...");
-		goto error;
-	}
-	if( !(fid->qid.type & QTDIR) ){
-		chat("qid.type=0x%x...", fid->qid.type);
-		goto error;
-	}
-	sinbr = f->pinbr;
-	if( name == 0 || name[0] == 0 || !strcmp(name, ".") ){
-		*qid = fid->qid;
-		goto ok;
-	}else if( !strcmp(name, "..") ){
-		if( fid->qid.path == f->xf->rootqid.path ){
-			chat("walkup from root...");
-			*qid = fid->qid;
-			goto ok;
-		}
-		if( get_inode(f, f->pinbr) < 0 )
-			goto error;
-		if( f->pinbr == EXT2_ROOT_INODE ){
-			*qid = f->xf->rootqid;
-			f->pinbr = EXT2_ROOT_INODE;
-		} else {
-			*qid = (Qid){f->pinbr,0,QTDIR};
-			f->inbr = f->pinbr;
-			if( (nr = get_file(f, "..")) < 0 )
-				goto error;
-			f->pinbr = nr;
-		}
-	}else{
-		f->pinbr = f->inbr;
-		if( (nr = get_file(f, name)) < 0 )
-			goto error;
-		if( get_inode(f, nr) < 0 )
-			goto error;
-		*qid = (Qid){nr,0,0};
-		if( nr == EXT2_ROOT_INODE )
-			*qid = f->xf->rootqid;
-		else if( S_ISDIR(getmode(f)) )
-			 qid->type = QTDIR;
-		/*strcpy(f->name, thdr.name);*/
-	}
-ok:
-	chat("OK\n");
-	return 0;
-error:
-	f->pinbr = sinbr;
-	chat("%s\n", xerrstr(Enonexist));
-	return xerrstr(Enonexist);
-}
-static void
-rstat(Req *r)
-{
-	Xfile *f=xfile(r->fid, Asis);
-
-	chat("stat(fid=%d)...", thdr.fid);
-	errno = 0;
-	if( !f )
-		errno = Eio;
-	else{
-		dostat(r->fid->qid, f, &r->d);
-	}
-	response(r);
-}
-static void
-rwstat(Req *r)
-{
-	Xfile *f=xfile(r->fid, Asis);
-
-	chat("wstat(fid=%d)...", thdr.fid);
-	errno = 0;
-	if( !f )
-		errno = Eio;
-	else
-		dowstat(f, &r->d);
-	response(r);	
-}
-static void
-rread(Req *r)
-{
-	Xfile *f; 
-	int nr;
-
-	chat("read(fid=%d,offset=%lld,count=%d)...",
-		thdr.fid, thdr.offset, thdr.count);
-	errno = 0;
-	if ( !(f=xfile(r->fid, Asis)) )
-		goto error;
-	if( r->fid->qid.type & QTDIR ){
-		nr = readdir(f, r->rbuf, thdr.offset, thdr.count);
-	}else
-		nr = readfile(f, r->rbuf, thdr.offset, thdr.count);
-	
-	if(nr >= 0){
-		rhdr.count = nr;
-		chat("rcnt=%d...OK\n", nr);
-		respond(r, nil);
-		return;
-	}
-error:
-	errno = Eio;
-	response(r);
-}
-static void
-rwrite(Req *r)
-{
-	Xfile *f; int nr;
-	
-	chat("write(fid=%d,offset=%lld,count=%d)...",
-		thdr.fid, thdr.offset, thdr.count);
-
-	errno = 0;
-	if (!(f=xfile(r->fid, Asis)) ){
-		errno = Eio;
-		goto error;
-	}
-	if( !S_ISREG(getmode(f)) ){
-		errno = Elink;
-		goto error;
-	}
-	nr = writefile(f, thdr.data, thdr.offset, thdr.count);
-	if(nr >= 0){	
-		rhdr.count = nr;
-		chat("rcnt=%d...OK\n", nr);
-		respond(r, nil);
-		return;
-	}
-	errno = Eio;
-error:
-	response(r);
-}
-static void
-destroyfid(Fid *fid)
-{
-	chat("destroy(fid=%d)\n", fid->fid);
-	xfile(fid, Clunk);
-	/*syncbuf(xf);*/
-}
-static void
-ropen(Req *r)
-{
-	Xfile *f;
-
-	chat("open(fid=%d,mode=%d)...", thdr.fid, thdr.mode);
-
-	errno = 0;
-	f = xfile(r->fid, Asis);
-	if( !f ){
-		errno = Eio;
-		goto error;
-	}
-	
-	if(thdr.mode & OTRUNC){
-		if( !S_ISREG(getmode(f)) ){
-			errno = Eperm;
-			goto error;
-		}
-		if(truncfile(f) < 0){
-			goto error;
-		}
-	}
-	chat("f->qid=0x%8.8lux...", r->fid->qid.path);
-	rhdr.qid = r->fid->qid;
-error:
-	response(r);
-}
-static void
-rcreate(Req *r)
-{
-	Xfile *f;
-	int inr, perm;
-
-	chat("create(fid=%d,name=\"%s\",perm=%uo,mode=%d)...",
-		thdr.fid, thdr.name, thdr.perm, thdr.mode);
-
-	errno = 0;
-	if(strcmp(thdr.name, ".") == 0 || strcmp(thdr.name, "..") == 0){
-		errno = Eperm;
-		goto error;
-	}
-	f = xfile(r->fid, Asis);
-	if( !f ){
-		errno = Eio;
-		goto error;
-	}
-	if( strlen(thdr.name) > EXT2_NAME_LEN ){
-		chat("name too long ...");
-		errno = Elongname;
-		goto error;
-	}
-
-	/* create */
-	errno = 0;
-	if( thdr.perm & DMDIR ){
-		perm = (thdr.perm & ~0777) | 
-				(getmode(f) & thdr.perm & 0777);
-		perm |= S_IFDIR;
-		inr = create_dir(f, thdr.name, perm);
-	}else{
-		perm = (thdr.perm & (~0777|0111)) |
-				(getmode(f) & thdr.perm & 0666);
-		perm |= S_IFREG;
-		inr = create_file(f, thdr.name, perm);
-		
-	}
-	if( inr < 0 )
-		goto error;
-
-	/* fill with new inode */
-	f->pinbr = f->inbr;
-	if( get_inode(f, inr) < 0 ){
-		errno = Eio;
-		goto error;
-	}
-	r->fid->qid = (Qid){inr, 0, 0};
-	if( S_ISDIR(getmode(f)) )
-		r->fid->qid.type |= QTDIR;
-	chat("f->qid=0x%8.8lux...", r->fid->qid.path);
-	rhdr.qid = r->fid->qid;
-error:
-	response(r);
-}
-static void
-rremove(Req *r)
-{
-	Xfile *f=xfile(r->fid, Asis);
-
-	chat("remove(fid=%d) ...", thdr.fid);
-
-	errno = 0;
-	if(!f){
-		errno = Eio;
-		goto error;
-	}
-
-	/* check permission here !!!!*/
-
-	unlink(f);
-
-error:
-	response(r);
-}
-
-Srv ext2srv = {
-	.destroyfid =	destroyfid,
-	.attach =	rattach,
-	.stat =		rstat,
-	.wstat =	rwstat,
-	.clone =	rclone,
-	.walk1 =	rwalk1,
-	.open =		ropen,
-	.read =		rread,
-	.write =	rwrite,
-	.create =	rcreate,
-	.remove =	rremove,
-};
--- a/sys/src/cmd/ext2srv/ext2srv.man
+++ /dev/null
@@ -1,110 +1,0 @@
-.TH EXT2SRV 4
-.SH NAME
-ext2srv \- ext2 file system
-.SH SYNOPSIS
-.B ext2srv
-[
-.B -vrs
-] [
-.B -f
-.I file
-] [
-.B -p
-.I passwd
-] [
-.B -g
-.I group
-] [
-.I service
-]
-.SH DESCRIPTION
-.I Ext2srv
-is a file server that interprets the Linux Second Extended File System.
-A single instance of
-.I ext2srv
-can provide access to multiple ext2 partitions simultaneously.
-.PP
-.I Ext2srv
-posts a file descriptor named
-.I service
-(default
-.BR ext2 )
-in the
-.B /srv
-directory.
-To access an ext2 file system on a device, use
-.B mount
-with the
-.I spec
-argument
-(see
-.IR bind (1))
-the name of the file holding the raw ext2 file system, typically the disk or partition.
-If
-.I spec
-is undefined in the
-.BR mount ,
-.I ext2srv
-will use
-.I file
-as the default name for the device holding the file system.
-.PP
-Normally
-.I ext2srv
-creates a pipe to act as the communications channel between
-itself and its clients.
-The
-.B -s
-flag instructs
-.I ext2srv
-to use its standard input and output instead.
-This flag also prevents the creation of an explicit service file in
-.BR /srv .
-.PP
-The
-.B -v
-flag causes verbose output for debugging, while
-the
-.B -r
-flag (recommended) makes the file system read-only.
-The optional
-.B -p
-and
-.B -g
-flags specify Unix-format password (respectively group) files
-that give the mapping between the numeric user- and group-ID
-numbers in the ext2 file system and the strings reported by Plan 9 status
-inquiries.
-.PP
-There is no authentication or permission checking.
-Anyone who can access the ext2 file system will have full access
-to all its files, including write access if
-.I ext2srv
-is not started with the
-.B -r
-flag, irrespective of file ownership and permission flags.
-.PP
-Some file system state is cached in memory, and may
-be flushed only when the file system is unmounted.
-Therefore if
-.I ext2srv
-is stopped or the machine is rebooted while an ext2 file system
-is still mounted,
-the superblock on the device will have been marked `not valid'
-(unless the
-.B -r
-flag was used),
-and a
-.I fsck
-will be required before that file system may be mounted again.
-.SH BUGS
-There is no authentication or permission checking.
-The implementation has not tracked any changes to the ext2
-specification since it was written.
-There may be other bugs.
-It is advisable to use
-.I ext2srv
-in read-only mode whenever possible.
-.SH AUTHOR
-Bodet Laurent ([email protected]),
-with later updates by Russ Cox and Richard Miller.
--- a/sys/src/cmd/ext2srv/ext2subs.c
+++ /dev/null
@@ -1,1870 +1,0 @@
-/*
- * ext2subs.c version 0.20
- * 
- * Some strategic functions come from linux/fs/ext2
- * kernel sources written by Remy Card.
- *
-*/
-
-#include <u.h>
-#include <libc.h>
-#include <bio.h>
-#include <fcall.h>
-#include <thread.h>
-#include <9p.h>
-#include "dat.h"
-#include "fns.h"
-
-#define putext2(e)	putbuf((e).buf)
-#define dirtyext2(e)	dirtybuf((e).buf)
-
-static Intmap *uidmap, *gidmap;
-
-static int
-getnum(char *s, int *n)
-{
-	char *r;
-
-	*n = strtol(s, &r, 10);
-	return (r != s);
-}
-
-static Intmap*
-idfile(char *f)
-{
-	Biobuf *bin;
-	Intmap *map;
-	char *fields[3];
-	char *s;
-	int nf, id;
-
-	map = allocmap(0);
-	bin = Bopen(f, OREAD);
-	if (bin == 0)
-		return 0;
-	while ((s = Brdline(bin, '\n')) != 0) {
-		s[Blinelen(bin)-1] = '\0';
-		nf = getfields(s, fields, 3, 0, ":");
-		if (nf == 3 && getnum(fields[2], &id))
-			insertkey(map, id, strdup(fields[0]));
-	}
-	Bterm(bin);
-	return map;
-}
-
-void
-uidfile(char *f)
-{
-	uidmap = idfile(f);
-}
-
-void
-gidfile(char *f)
-{
-	gidmap = idfile(f);
-}
-
-static char*
-mapuid(int id)
-{
-	static char s[12];
-	char *p;
-
-	if (uidmap && (p = lookupkey(uidmap, id)) != 0)
-		return p;
-	sprint(s, "%d", id);
-	return s;
-}
-
-static char*
-mapgid(int id)
-{
-	static char s[12];
-	char *p;
-
-	if (gidmap && (p = lookupkey(gidmap, id)) != 0)
-		return p;
-	sprint(s, "%d", id);
-	return s;
-}
-
-int
-ext2fs(Xfs *xf)
-{
-	SuperBlock superblock;
-
-	/* get the super block */
-	seek(xf->dev, OFFSET_SUPER_BLOCK, 0);
-	if( sizeof(SuperBlock) != 
-				read(xf->dev, &superblock, sizeof(SuperBlock)) ){
-		chat("can't read super block %r...", xf->dev);
-		errno = Eformat;
-		return -1;
-	}
-	if( superblock.s_magic != EXT2_SUPER_MAGIC ){
-		chat("Bad super block...");
-		errno = Eformat;
-		return -1;
-	}
-	if( !(superblock.s_state & EXT2_VALID_FS) ){
-		chat("fs not checked...");
-		errno = Enotclean;
-		return -1;
-	}
-	
-	xf->block_size = EXT2_MIN_BLOCK_SIZE << superblock.s_log_block_size;
-	xf->desc_per_block = xf->block_size / sizeof (GroupDesc);
-	xf->inodes_per_group = superblock.s_inodes_per_group;
-	xf->inodes_per_block = xf->block_size / sizeof (Inode);
-	xf->addr_per_block = xf->block_size / sizeof (uint);
-	xf->blocks_per_group = superblock.s_blocks_per_group;
-
-	if( xf->block_size == OFFSET_SUPER_BLOCK )
-		xf->superaddr = 1, xf->superoff = 0, xf->grpaddr = 2;
-	else if( xf->block_size == 2*OFFSET_SUPER_BLOCK ||
-			xf->block_size == 4*OFFSET_SUPER_BLOCK )
-		xf->superaddr = 0, xf->superoff = OFFSET_SUPER_BLOCK, xf->grpaddr = 1;
-	else {
-		chat(" blocks of %d bytes are not supported...", xf->block_size);
-		errno = Eformat;
-		return -1;
-	}
-
-	chat("good super block...");
-
-	xf->ngroups = (superblock.s_blocks_count - 
-				superblock.s_first_data_block + 
-				superblock.s_blocks_per_group -1) / 
-				superblock.s_blocks_per_group;
-
-	superblock.s_state &= ~EXT2_VALID_FS;
-	superblock.s_mnt_count++;
-	seek(xf->dev, OFFSET_SUPER_BLOCK, 0);
-	if( !rdonly && sizeof(SuperBlock) != 
-				write(xf->dev, &superblock, sizeof(SuperBlock)) ){
-		chat("can't write super block...");
-		errno = Eio;
-		return -1;
-	}
-
-	return 0;
-}
-Ext2
-getext2(Xfs *xf, char type, int n)
-{
-	Iobuf *bd;
-	Ext2 e;
-
-	switch(type){
-	case EXT2_SUPER:
-		e.buf = getbuf(xf, xf->superaddr);
-		if( !e.buf ) goto error;
-		e.u.sb = (SuperBlock *)(e.buf->iobuf + xf->superoff);
-		e.type = EXT2_SUPER;
-		break;
-	case EXT2_DESC:
-		e.buf = getbuf(xf, DESC_ADDR(xf, n));
-		if( !e.buf ) goto error;
-		e.u.gd = DESC_OFFSET(xf, e.buf->iobuf, n);
-		e.type = EXT2_DESC;
-		break;
-	case EXT2_BBLOCK:
-		bd = getbuf(xf, DESC_ADDR(xf, n));
-		if( !bd ) goto error;
-		e.buf = getbuf(xf, DESC_OFFSET(xf, bd->iobuf, n)->bg_block_bitmap);
-		if( !e.buf ){
-			putbuf(bd);
-			goto error;
-		}
-		putbuf(bd);
-		e.u.bmp = (char *)e.buf->iobuf;
-		e.type = EXT2_BBLOCK;
-		break;
-	case EXT2_BINODE:
-		bd = getbuf(xf, DESC_ADDR(xf, n));
-		if( !bd ) goto error;
-		e.buf = getbuf(xf, DESC_OFFSET(xf, bd->iobuf, n)->bg_inode_bitmap);
-		if( !e.buf ){
-			putbuf(bd);
-			goto error;
-		}
-		putbuf(bd);
-		e.u.bmp = (char *)e.buf->iobuf;
-		e.type = EXT2_BINODE;
-		break;
-	default:
-		goto error;
-	}
-	return e;
-error:
-	panic("getext2");
-	return e;
-}
-int
-get_inode( Xfile *file, uint nr )
-{
-	unsigned long block_group, block;
-	Xfs *xf = file->xf;
-	Ext2 ed, es;
-
-	es = getext2(xf, EXT2_SUPER, 0);
-	if(nr > es.u.sb->s_inodes_count ){
-		chat("inode number %d is too big...", nr);
-		putext2(es);
-		errno = Eio;
-		return -1;
-	}
-	putext2(es);
-	block_group = (nr - 1) / xf->inodes_per_group;
-	if( block_group >= xf->ngroups ){
-		chat("block group (%d) > groups count...", block_group);
-		errno = Eio;
-		return -1;
-	}
-	ed = getext2(xf, EXT2_DESC, block_group);
-	block = ed.u.gd->bg_inode_table + (((nr-1) % xf->inodes_per_group) / 
-			xf->inodes_per_block);
-	putext2(ed);
-
-	file->bufoffset = (nr-1) % xf->inodes_per_block;
-	file->inbr = nr;
-	file->bufaddr= block;
-
-	return 1;
-}
-int
-get_file( Xfile *f, char *name)
-{	
-	uint offset, nr, i;
-	Xfs *xf = f->xf;
-	Inode *inode;
-	int nblock;
-	DirEntry *dir;
-	Iobuf *buf, *ibuf;
-	
-	if( !S_ISDIR(getmode(f)) )
-		return -1;
-	ibuf = getbuf(xf, f->bufaddr);
-	if( !ibuf )
-		return -1;
-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-	nblock = (inode->i_blocks * 512) / xf->block_size;
-
-	for(i=0 ; (i < nblock) && (i < EXT2_NDIR_BLOCKS) ; i++){
-		buf = getbuf(xf, inode->i_block[i]);
-		if( !buf ){
-			putbuf(ibuf);
-			return -1;
-		}
-		for(offset=0 ; offset < xf->block_size ;  ){
-			dir = (DirEntry *)(buf->iobuf + offset);
-			if( dir->name_len==strlen(name) && 
-					!strncmp(name, dir->name, dir->name_len) ){
-				nr = dir->inode;
-				putbuf(buf);
-				putbuf(ibuf);
-				return nr;
-			}
-			offset += dir->rec_len;
-		}
-		putbuf(buf);
-
-	}
-	putbuf(ibuf);
-	errno = Enonexist;
-	return -1;
-}
-char *
-getname(Xfile *f, char *str)
-{
-	Xfile ft;
-	int offset, i, len;
-	Xfs *xf = f->xf;
-	Inode *inode;
-	int nblock;
-	DirEntry *dir;
-	Iobuf *buf, *ibuf;
-
-	ft = *f;
-	if( get_inode(&ft, f->pinbr) < 0 )
-		return 0;
-	if( !S_ISDIR(getmode(&ft)) )
-		return 0;
-	ibuf = getbuf(xf, ft.bufaddr);
-	if( !ibuf )
-		return 0;
-	inode = ((Inode *)ibuf->iobuf) + ft.bufoffset;
-	nblock = (inode->i_blocks * 512) / xf->block_size;
-
-	for(i=0 ; (i < nblock) && (i < EXT2_NDIR_BLOCKS) ; i++){
-		buf = getbuf(xf, inode->i_block[i]);
-		if( !buf ){
-			putbuf(ibuf);
-			return 0;
-		}
-		for(offset=0 ; offset < xf->block_size ;  ){
-			dir = (DirEntry *)(buf->iobuf + offset);
-			if( f->inbr == dir->inode ){
-				len = (dir->name_len < EXT2_NAME_LEN) ? dir->name_len : EXT2_NAME_LEN;
-				if (str == 0)
-					str = malloc(len+1);
-				strncpy(str, dir->name, len);   
-				str[len] = 0;
-				putbuf(buf);
-				putbuf(ibuf);
-				return str;
-			}
-			offset += dir->rec_len;
-		}
-		putbuf(buf);
-	}
-	putbuf(ibuf);
-	errno = Enonexist;
-	return 0;
-}
-void
-dostat(Qid qid, Xfile *f, Dir *dir )
-{
-	Inode *inode;
-	Iobuf *ibuf;
-	char *name;
-
-	memset(dir, 0, sizeof(Dir));
-
-	if(  f->inbr == EXT2_ROOT_INODE ){
-		dir->name = estrdup9p("/");
-		dir->qid = (Qid){0,0,QTDIR};
-		dir->mode = DMDIR | 0777;
-	}else{
-		ibuf = getbuf(f->xf, f->bufaddr);
-		if( !ibuf )
-			return;
-		inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-		dir->length = inode->i_size;
-		dir->atime = inode->i_atime;
-		dir->mtime = inode->i_mtime;
-		putbuf(ibuf);
-		name = getname(f, 0);
-		dir->name = name;
-		dir->uid = estrdup9p(mapuid(inode->i_uid));
-		dir->gid = estrdup9p(mapgid(inode->i_gid));
-		dir->qid = qid;
-		dir->mode = getmode(f);
-		if( qid.type & QTDIR )
-			dir->mode |= DMDIR;
-	}
-
-}
-int 
-dowstat(Xfile *f, Dir *stat)
-{
-	Xfs *xf = f->xf;
-	Inode *inode;
-	Xfile fdir;
-	Iobuf *ibuf;
-	char name[EXT2_NAME_LEN+1];
-
-	/* change name */
-	getname(f, name);
-	if( stat->name && stat->name[0] != 0 && strcmp(name, stat->name) ){
-
-		/* get dir */
-		fdir = *f;
-		if( get_inode(&fdir, f->pinbr) < 0 ){
-			chat("can't get inode %d...", f->pinbr);
-			return -1;
-		}
-	
-		ibuf = getbuf(xf, fdir.bufaddr);
-		if( !ibuf )
-			return -1;
-		inode = ((Inode *)ibuf->iobuf) +fdir.bufoffset;
-
-		/* Clean old dir entry */
-		if( delete_entry(xf, inode, f->inbr) < 0 ){
-			chat("delete entry failed...");
-			putbuf(ibuf);	
-			return -1;
-		}
-		putbuf(ibuf);
-
-		/* add the new entry */
-		if( add_entry(&fdir, stat->name, f->inbr) < 0 ){
-			chat("add entry failed...");	
-			return -1;
-		}
-	
-	}
-
-	ibuf = getbuf(xf, f->bufaddr);
-	if( !ibuf )
-		return -1;
-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-
-	if (stat->mode != ~0)
-	if( (getmode(f) & 0777) != (stat->mode & 0777) ){
-		inode->i_mode = (getmode(f) & ~0777) | (stat->mode & 0777);
-		dirtybuf(ibuf);
-	}
-	if (stat->mtime != ~0)
-	if(  inode->i_mtime != stat->mtime ){
-		inode->i_mtime = stat->mtime;
-		dirtybuf(ibuf);
-	}
-
-	putbuf(ibuf);
-
-	return 1;
-}
-long
-readfile(Xfile *f, void *vbuf, vlong offset, long count)
-{
-	Xfs *xf = f->xf;
-	Inode *inode;
-	Iobuf *buffer, *ibuf;
-	long rcount;
-	int len, o, cur_block, baddr;
-	uchar *buf;
-
-	buf = vbuf;
-	
-	ibuf = getbuf(xf, f->bufaddr);
-	if( !ibuf )
-		return -1;
-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-
-	if( offset >= inode->i_size ){
-		putbuf(ibuf);
-		return 0;
-	}
-	if( offset + count > inode->i_size )
-		count = inode->i_size - offset;
-
-	/* fast link */
-	if( S_ISLNK(getmode(f)) && (inode->i_size <= EXT2_N_BLOCKS<<2) ){
-		memcpy(&buf[0], ((char *)inode->i_block)+offset, count);
-		putbuf(ibuf);	
-		return count;
-	}
-	chat("read block [ ");
-	cur_block = offset / xf->block_size;
-	o = offset % xf->block_size;
-	rcount = 0;
-	while( count > 0 ){
-		baddr = bmap(f, cur_block++);
-		if( !baddr ){
-			putbuf(ibuf);
-			return -1;
-		}
-		buffer = getbuf(xf, baddr);
-		if( !buffer ){
-			putbuf(ibuf);
-			return -1;
-		}
-		chat("%d ", baddr);
-		len = xf->block_size - o;
-		if( len > count )
-			len = count;
-		memcpy(&buf[rcount], &buffer->iobuf[o], len);
-		rcount += len;
-		count -= len;
-		o = 0;
-		putbuf(buffer);
-	}
-	chat("] ...");
-	inode->i_atime = time(0);
-	dirtybuf(ibuf);
-	putbuf(ibuf);
-	return rcount;
-}
-long
-readdir(Xfile *f, void *vbuf, vlong offset, long count)
-{
-	int off, i, len;
-	long rcount;
-	Xfs *xf = f->xf;
-	Inode *inode, *tinode;
-	int nblock;
-	DirEntry *edir;
-	Iobuf *buffer, *ibuf, *tbuf;
-	Dir pdir;
-	Xfile ft;
-	uchar *buf;
-	char name[EXT2_NAME_LEN+1];
-	unsigned int dirlen;
-	int index;
-
-	buf = vbuf;
-	if (offset == 0)
-		f->dirindex = 0;
-	
-	if( !S_ISDIR(getmode(f)) )
-		return -1;
-
-	ibuf = getbuf(xf, f->bufaddr);
-	if( !ibuf )
-		return -1;
-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-	nblock = (inode->i_blocks * 512) / xf->block_size;
-	ft = *f;
-	chat("read block [ ");
-	index = 0;
-	for(i=0, rcount=0 ; (i < nblock) && (i < EXT2_NDIR_BLOCKS) ; i++){
-		
-		buffer = getbuf(xf, inode->i_block[i]);
-		if( !buffer ){
-			putbuf(ibuf);
-			return -1;
-		}
-		chat("%d, ", buffer->addr);
-		for(off=0 ; off < xf->block_size ;  ){
-		
-			edir = (DirEntry *)(buffer->iobuf + off);	
-			off += edir->rec_len;
-			if( (edir->name[0] == '.' ) && (edir->name_len == 1))
-				continue;
-			if(edir->name[0] == '.' && edir->name[1] == '.' && 
-										edir->name_len == 2)
-				continue;
-			if( edir->inode == 0 ) /* for lost+found dir ... */
-				continue;
-			if( index++ < f->dirindex )
-				continue;
-			
-			if( get_inode(&ft, edir->inode) < 0 ){
-				chat("can't find ino no %d ] ...", edir->inode);
-error:			putbuf(buffer);
-				putbuf(ibuf);
-				return -1;
-			}
-			tbuf = getbuf(xf, ft.bufaddr);
-			if( !tbuf )
-				goto error;
-			tinode = ((Inode *)tbuf->iobuf) + ft.bufoffset;
-
-			memset(&pdir, 0, sizeof(Dir));			
-			
-			/* fill plan9 dir struct */			
-			pdir.name = name;
-			len = (edir->name_len < EXT2_NAME_LEN) ? edir->name_len : EXT2_NAME_LEN;
-			strncpy(pdir.name, edir->name, len);   
-			pdir.name[len] = 0;
-// chat("name %s len %d\n", pdir.name, edir->name_len);
-			pdir.uid = mapuid(tinode->i_uid);
-			pdir.gid = mapgid(tinode->i_gid);
-			pdir.qid.path = edir->inode;
-			pdir.mode = tinode->i_mode;
-			if( edir->inode == EXT2_ROOT_INODE )
-				pdir.qid.path = f->xf->rootqid.path;
-			else if( S_ISDIR( tinode->i_mode) )
-				pdir.qid.type |= QTDIR;
-			if( pdir.qid.type & QTDIR )
-				pdir.mode |= DMDIR;
-			pdir.length = tinode->i_size;
-			pdir.atime = tinode->i_atime;
-			pdir.mtime = tinode->i_mtime;
-		
-			putbuf(tbuf);
-
-			dirlen = convD2M(&pdir, &buf[rcount], count-rcount);
-			if ( dirlen <= BIT16SZ ) {
-				chat("] ...");
-				putbuf(buffer);
-				putbuf(ibuf);
-				return rcount;
-			}
-			rcount += dirlen;
-			f->dirindex++;
-
-		}
-		putbuf(buffer);
-	}
-	chat("] ...");
-	putbuf(ibuf);
-	return rcount;
-}
-int
-bmap( Xfile *f, int block )
-{
-	Xfs *xf = f->xf;
-	Inode *inode;
-	Iobuf *buf, *ibuf;
-	int addr;
-	int addr_per_block = xf->addr_per_block;
-	int addr_per_block_bits = ffz(~addr_per_block);
-	
-	if(block < 0) {
-		chat("bmap() block < 0 ...");
-		return 0;
-	}
-	if(block >= EXT2_NDIR_BLOCKS + addr_per_block +
-		(1 << (addr_per_block_bits * 2)) +
-		((1 << (addr_per_block_bits * 2)) << addr_per_block_bits)) {
-		chat("bmap() block > big...");
-		return 0;
-	}
-
-	ibuf = getbuf(xf, f->bufaddr);
-	if( !ibuf )
-		return 0;
-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-
-	/* direct blocks */
-	if(block < EXT2_NDIR_BLOCKS){
-		putbuf(ibuf);
-		return inode->i_block[block];
-	}
-	block -= EXT2_NDIR_BLOCKS;
-	
-	/* indirect blocks*/
-	if(block < addr_per_block) {
-		addr = inode->i_block[EXT2_IND_BLOCK];
-		if (!addr) goto error;
-		buf = getbuf(xf, addr);
-		if( !buf ) goto error;
-		addr = *(((uint *)buf->iobuf) + block);
-		putbuf(buf);
-		putbuf(ibuf);
-		return addr;	
-	}
-	block -= addr_per_block;
-	
-	/* double indirect blocks */
-	if(block < (1 << (addr_per_block_bits * 2))) {
-		addr = inode->i_block[EXT2_DIND_BLOCK];
-		if (!addr) goto error;
-		buf = getbuf(xf, addr);
-		if( !buf ) goto error;
-		addr = *(((uint *)buf->iobuf) + (block >> addr_per_block_bits));
-		putbuf(buf);
-		buf = getbuf(xf, addr);
-		if( !buf ) goto error;
-		addr = *(((uint *)buf->iobuf) + (block & (addr_per_block - 1)));
-		putbuf(buf);
-		putbuf(ibuf);
-		return addr;
-	}
-	block -= (1 << (addr_per_block_bits * 2));
-
-	/* triple indirect blocks */
-	addr = inode->i_block[EXT2_TIND_BLOCK];
-	if(!addr) goto error;
-	buf = getbuf(xf, addr);
-	if( !buf ) goto error;
-	addr = *(((uint *)buf->iobuf) + (block >> (addr_per_block_bits * 2)));
-	putbuf(buf);
-	if(!addr) goto error;
-	buf = getbuf(xf, addr);
-	if( !buf ) goto error;
-	addr = *(((uint *)buf->iobuf) +
-			((block >> addr_per_block_bits) & (addr_per_block - 1)));
-	putbuf(buf);
-	if(!addr) goto error;
-	buf = getbuf(xf, addr);
-	if( !buf ) goto error;
-	addr = *(((uint *)buf->iobuf) + (block & (addr_per_block - 1)));
-	putbuf(buf);
-	putbuf(ibuf);
-	return addr;
-error:
-	putbuf(ibuf);
-	return 0;
-}
-long
-writefile(Xfile *f, void *vbuf, vlong offset, long count)
-{
-	Xfs *xf = f->xf;
-	Inode *inode;
-	Iobuf *buffer, *ibuf;
-	long w;
-	int len, o, cur_block, baddr;
-	char *buf;
-
-	buf = vbuf;
-
-	ibuf = getbuf(xf, f->bufaddr);
-	if( !ibuf )
-		return -1;
-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-
-	chat("write block [ ");
-	cur_block = offset / xf->block_size;
-	o = offset % xf->block_size;
-	w = 0;
-	while( count > 0 ){
-		baddr = getblk(f, cur_block++);
-		if( baddr <= 0 )
-			goto end;
-		buffer = getbuf(xf, baddr);
-		if( !buffer )
-			goto end;
-		chat("%d ", baddr);
-		len = xf->block_size - o;
-		if( len > count )
-			len = count;
-		memcpy(&buffer->iobuf[o], &buf[w], len);
-		dirtybuf(buffer);
-		w += len;
-		count -= len;
-		o = 0;
-		putbuf(buffer);
-	}
-end:
-	if( inode->i_size < offset + w )
-		inode->i_size = offset + w;
-	inode->i_atime = inode->i_mtime = time(0);
-	dirtybuf(ibuf);
-	putbuf(ibuf);
-	chat("]...");
-	if( errno )
-		return -1;
-	return w;
-}
-int 
-new_block( Xfile *f, int goal )
-{
-	Xfs *xf= f->xf;
-	int group, block, baddr, k, redo;
-	ulong lmap;
-	char *p, *r;
-	Iobuf *buf;
-	Ext2 ed, es, eb;
-	
-	es = getext2(xf, EXT2_SUPER, 0);
-	redo = 0;
- 
-repeat:
-	
-	if( goal < es.u.sb->s_first_data_block || goal >= es.u.sb->s_blocks_count )
-		goal = es.u.sb->s_first_data_block;
-	group = (goal - es.u.sb->s_first_data_block) / xf->blocks_per_group;
-
-	ed = getext2(xf, EXT2_DESC, group);
-	eb = getext2(xf, EXT2_BBLOCK, group);
-
-	/* 
-	 * First, test if goal block is free
-	 */
-	if( ed.u.gd->bg_free_blocks_count > 0 ){
-		block = (goal - es.u.sb->s_first_data_block) % xf->blocks_per_group;
-		
-		if( !test_bit(block, eb.u.bmp) )
-			goto got_block;
-		
-		if( block ){
-			/*
-			 * goal wasn't free ; search foward for a free 
-			 * block within the next 32 blocks
-			*/
-			
-			lmap = (((ulong *)eb.u.bmp)[block>>5]) >>
-					((block & 31) + 1);
-			if( block < xf->blocks_per_group - 32 )
-				lmap |= (((ulong *)eb.u.bmp)[(block>>5)+1]) <<
-					( 31-(block & 31) );
-			else
-				lmap |= 0xffffffff << ( 31-(block & 31) );
-
-			if( lmap != 0xffffffffl ){
-				k = ffz(lmap) + 1;
-				if( (block + k) < xf->blocks_per_group ){
-					block += k;
-					goto got_block;
-				}
-			}			
-		}
-		/*
-		 * Search in the remaider of the group
-		*/
-		p = eb.u.bmp + (block>>3);
-		r = memscan(p, 0, (xf->blocks_per_group - block + 7) >>3);
-		k = ( r - eb.u.bmp )<<3;
-		if( k < xf->blocks_per_group ){
-			block = k;
-			goto search_back;
-		}
-		k = find_next_zero_bit((unsigned long *)eb.u.bmp, 
-						xf->blocks_per_group>>3, block);
-		if( k < xf->blocks_per_group ){
-			block = k;
-			goto got_block;
-		}
-	}
-
-	/*
-	 * Search the rest of groups
-	*/
-	putext2(ed); putext2(eb);
-	for(k=0 ; k < xf->ngroups ; k++){
-		group++;
-		if( group >= xf->ngroups )
-			group = 0;
-		ed = getext2(xf, EXT2_DESC, group);
-		if( ed.u.gd->bg_free_blocks_count > 0 )
-			break;
-		putext2(ed);
-	}
-	if( redo && group == xf->ngroups-1 ){
-		putext2(ed);
-		goto full;
-	}
-	if( k >=xf->ngroups ){
-		/*
-		 * All groups are full or
-		 * we have retry (because the last block) and all other
-		 * groups are also full.
-		*/
-full:	
-		chat("no free blocks ...");
-	 	putext2(es); 
-		errno = Enospace;
-		return 0;
-	}
-	eb = getext2(xf, EXT2_BBLOCK, group);
-	r = memscan(eb.u.bmp,  0, xf->blocks_per_group>>3);
-	block = (r - eb.u.bmp) <<3;
-	if( block < xf->blocks_per_group )
-		goto search_back;
-	else
-		block = find_first_zero_bit((ulong *)eb.u.bmp,
-								xf->blocks_per_group>>3);
-	if( block >= xf->blocks_per_group ){
-		chat("Free block count courupted for block group %d...", group);
-		putext2(ed); putext2(eb); putext2(es);
-		errno = Ecorrupt;
-		return 0;
-	}
-
-
-search_back:
-	/*
-	 * A free byte was found in the block. Now search backwards up
-	 * to 7 bits to find the start of this group of free block.
-	*/
-	for(k=0 ; k < 7 && block > 0 && 
-		!test_bit(block-1, eb.u.bmp) ; k++, block--);
-
-got_block:
-
-	baddr = block + (group * xf->blocks_per_group) + 
-				es.u.sb->s_first_data_block;
-	
-	if( baddr == ed.u.gd->bg_block_bitmap ||
-	     baddr == ed.u.gd->bg_inode_bitmap ){
-		chat("Allocating block in system zone...");
-		putext2(ed); putext2(eb); putext2(es);
-		errno = Eintern;
-		return 0;
-	}
-
-	if( set_bit(block, eb.u.bmp) ){
-		chat("bit already set (%d)...", block);
-		putext2(ed); putext2(eb); putext2(es);
-		errno = Ecorrupt;
-		return 0;
-	}
-	dirtyext2(eb);
-	
-	if( baddr >= es.u.sb->s_blocks_count ){
-		chat("block >= blocks count...");
-		errno = Eintern;
-error:
-		clear_bit(block, eb.u.bmp);
-		putext2(eb); putext2(ed); putext2(es);
-		return 0;
-	}
-	
-	buf = getbuf(xf, baddr);
-	if( !buf ){
-		if( !redo ){
-			/*
-			 * It's perhaps the last block of the disk and 
-			 * it can't be acceded because the last sector.
-			 * Therefore, we try one more time with goal at 0
-			 * to force scanning all groups.
-			*/
-			clear_bit(block, eb.u.bmp);
-			putext2(eb); putext2(ed);
-			goal = 0; errno = 0; redo++;
-			goto repeat;
-		}
-		goto error;
-	}
-	memset(&buf->iobuf[0], 0, xf->block_size);
-	dirtybuf(buf);
-	putbuf(buf);
-
-	es.u.sb->s_free_blocks_count--;
-	dirtyext2(es);
-	ed.u.gd->bg_free_blocks_count--;
-	dirtyext2(ed);
-
-	putext2(eb);
-	putext2(ed);
-	putext2(es);
-	chat("new ");
-	return baddr;
-}
-int
-getblk(Xfile *f, int block)
-{
-	Xfs *xf = f->xf;
-	int baddr;
-	int addr_per_block = xf->addr_per_block;
-
-	if (block < 0) {
-		chat("getblk() block < 0 ...");
-		return 0;
-	}
-	if(block > EXT2_NDIR_BLOCKS + addr_per_block +
-			addr_per_block * addr_per_block +
-			addr_per_block * addr_per_block * addr_per_block ){
-		chat("getblk() block > big...");
-		errno = Eintern;
-		return 0;
-	}
-	if( block < EXT2_NDIR_BLOCKS )
-		return inode_getblk(f, block);
-	block -= EXT2_NDIR_BLOCKS;	
-	if( block < addr_per_block ){
-		baddr = inode_getblk(f, EXT2_IND_BLOCK);
-		baddr = block_getblk(f, baddr, block);
-		return baddr;
-	}
-	block -= addr_per_block;
-	if( block < addr_per_block * addr_per_block  ){
-		baddr = inode_getblk(f, EXT2_DIND_BLOCK);
-		baddr = block_getblk(f, baddr, block / addr_per_block);
-		baddr = block_getblk(f, baddr, block & ( addr_per_block-1));
-		return baddr; 
-	}
-	block -= addr_per_block * addr_per_block;
-	baddr = inode_getblk(f, EXT2_TIND_BLOCK);
-	baddr = block_getblk(f, baddr, block / (addr_per_block * addr_per_block));
-	baddr = block_getblk(f, baddr, (block / addr_per_block) & ( addr_per_block-1));
-	return block_getblk(f, baddr, block & ( addr_per_block-1));
-}
-int
-block_getblk(Xfile *f, int rb, int nr)
-{
-	Xfs *xf = f->xf;
-	Inode *inode;
-	int tmp, goal = 0;
-	int blocks = xf->block_size / 512;
-	Iobuf *buf, *ibuf;
-	uint *p;
-	Ext2 es;
-
-	if( !rb )
-		return 0;
-
-	buf = getbuf(xf, rb);
-	if( !buf )
-		return 0;
-	p = (uint *)(buf->iobuf) + nr;
-	if( *p ){
-		tmp = *p;
-		putbuf(buf);
-		return tmp;
-	}
-
-	for(tmp=nr - 1 ; tmp >= 0 ; tmp--){
-		if( ((uint *)(buf->iobuf))[tmp] ){
-			goal = ((uint *)(buf->iobuf))[tmp];
-			break;
-		}
-	}
-	if( !goal ){
-		es = getext2(xf, EXT2_SUPER, 0);
-		goal = (((f->inbr -1) / xf->inodes_per_group) *
-				xf->blocks_per_group) +
-				es.u.sb->s_first_data_block;
-		putext2(es);
-	}
-	
-	tmp = new_block(f, goal);
-	if( !tmp ){
-		putbuf(buf);
-		return 0;
-	}
-
-	*p = tmp;
-	dirtybuf(buf);
-	putbuf(buf);
-	
-	ibuf = getbuf(xf, f->bufaddr);
-	if( !ibuf )
-		return -1;
-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-	inode->i_blocks += blocks;
-	dirtybuf(ibuf);
-	putbuf(ibuf);
-
-	return tmp;
-}
-int 
-inode_getblk(Xfile *f, int block)
-{
-	Xfs *xf = f->xf;
-	Inode *inode;
-	Iobuf *ibuf;
-	int tmp, goal = 0;
-	int blocks = xf->block_size / 512;
-	Ext2 es;
-
-	ibuf = getbuf(xf, f->bufaddr);
-	if( !ibuf )
-		return -1;
-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-
-
-	if( inode->i_block[block] ){
-		putbuf(ibuf);
-		return inode->i_block[block];
-	}
-
-	for(tmp=block - 1 ; tmp >= 0 ; tmp--){
-		if( inode->i_block[tmp] ){
-			goal = inode->i_block[tmp];
-			break;
-		}
-	}
-	if( !goal ){
-		es = getext2(xf, EXT2_SUPER, 0);
-		goal = (((f->inbr -1) / xf->inodes_per_group) *
-				xf->blocks_per_group) +
-				es.u.sb->s_first_data_block;
-		putext2(es);
-	}
-
-	tmp = new_block(f, goal);
-	if( !tmp ){
-		putbuf(ibuf);
-		return 0;
-	}
-
-	inode->i_block[block] = tmp;
-	inode->i_blocks += blocks;
-	dirtybuf(ibuf);
-	putbuf(ibuf);
-
-	return tmp;
-}
-int 
-new_inode(Xfile *f, int mode)
-{
-	Xfs *xf = f->xf;
-	Inode *inode, *finode;
-	Iobuf *buf, *ibuf;
-	int ave,group, i, j;
-	Ext2 ed, es, eb;
-
-	group = -1;
-
-	es = getext2(xf, EXT2_SUPER, 0);
-
-	if( S_ISDIR(mode) ){	/* create directory inode */
-		ave = es.u.sb->s_free_inodes_count / xf->ngroups;
-		for(i=0 ; i < xf->ngroups ; i++){
-			ed = getext2(xf, EXT2_DESC, i);
-			if( ed.u.gd->bg_free_inodes_count &&
-					ed.u.gd->bg_free_inodes_count >= ave ){
-				if( group<0 || ed.u.gd->bg_free_inodes_count >
-								ed.u.gd->bg_free_inodes_count )
-					group = i;
-			}
-			putext2(ed);
-		}
-
-	}else{		/* create file inode */
-		/* Try to put inode in its parent directory */
-		i = (f->inbr -1) / xf->inodes_per_group;
-		ed = getext2(xf, EXT2_DESC, i);
-		if( ed.u.gd->bg_free_inodes_count ){
-			group = i;
-			putext2(ed);
-		}else{
-			/*
-			 * Use a quadratic hash to find a group whith
-			 * a free inode
-			 */
-			putext2(ed);
-			for( j=1 ; j < xf->ngroups ; j <<= 1){
-				i += j;
-				if( i >= xf->ngroups )
-					i -= xf->ngroups;
-				ed = getext2(xf, EXT2_DESC, i);
-				if( ed.u.gd->bg_free_inodes_count ){
-					group = i;
-					putext2(ed);
-					break;
-				}
-				putext2(ed);
-			}
-		}
-		if( group < 0 ){
-			/* try a linear search */
-			i = ((f->inbr -1) / xf->inodes_per_group) + 1;
-			for(j=2 ; j < xf->ngroups ; j++){
-				if( ++i >= xf->ngroups )
-					i = 0;
-				ed = getext2(xf, EXT2_DESC, i);
-				if( ed.u.gd->bg_free_inodes_count ){
-					group = i;
-					putext2(ed);
-					break;
-				}
-				putext2(ed);
-			}
-		}
-
-	}
-	if( group < 0 ){
-		chat("group < 0...");
-		putext2(es);
-		return 0;
-	}
-	ed = getext2(xf, EXT2_DESC, group);
-	eb = getext2(xf, EXT2_BINODE, group);
-	if( (j = find_first_zero_bit(eb.u.bmp, 
-			xf->inodes_per_group>>3)) < xf->inodes_per_group){
-		if( set_bit(j, eb.u.bmp) ){
-			chat("inode %d of group %d is already allocated...", j, group);
-			putext2(ed); putext2(eb); putext2(es);
-			errno = Ecorrupt;
-			return 0;
-		}
-		dirtyext2(eb);
-	}else if( ed.u.gd->bg_free_inodes_count != 0 ){
-		chat("free inodes count corrupted for group %d...", group);
-		putext2(ed); putext2(eb); putext2(es);
-		errno = Ecorrupt;
-		return 0;
-	}
-	i = j;
-	j += group * xf->inodes_per_group + 1;
-	if( j < EXT2_FIRST_INO || j >= es.u.sb->s_inodes_count ){
-		chat("reserved inode or inode > inodes count...");
-		errno = Ecorrupt;
-error:
-		clear_bit(i, eb.u.bmp);
-		putext2(eb); putext2(ed); putext2(es);
-		return 0;
-	}
-	
-	buf = getbuf(xf, ed.u.gd->bg_inode_table +
-			(((j-1) % xf->inodes_per_group) / 
-			xf->inodes_per_block));
-	if( !buf )
-		goto error;
-	inode = ((struct Inode *) buf->iobuf) + 
-		((j-1) % xf->inodes_per_block);
-	memset(inode, 0, sizeof(Inode));
-	inode->i_mode = mode;
-	inode->i_links_count = 1;
-	inode->i_uid = DEFAULT_UID;
-	inode->i_gid = DEFAULT_GID;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = time(0);
-	dirtybuf(buf);
-
-	ibuf = getbuf(xf, f->bufaddr);
-	if( !ibuf ){
-		putbuf(buf);
-		goto error;
-	}
-	finode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-	inode->i_flags = finode->i_flags;
-	inode->i_uid = finode->i_uid;
-	inode->i_gid = finode->i_gid;
-	dirtybuf(ibuf);
-	putbuf(ibuf);
-
-	putbuf(buf);
-
-	ed.u.gd->bg_free_inodes_count--;
-	if( S_ISDIR(mode) )
-		ed.u.gd->bg_used_dirs_count++;
-	dirtyext2(ed);
-
-	es.u.sb->s_free_inodes_count--;
-	dirtyext2(es);
-
-	putext2(eb);
-	putext2(ed);
-	putext2(es);
-
-	return j;
-}
-int
-create_file(Xfile *fdir, char *name, int mode)
-{
-	int inr;
-
-	inr = new_inode(fdir, mode);
-	if( !inr ){
-		chat("create one new inode failed...");
-		return -1;
-	}
-	if( add_entry(fdir, name, inr) < 0 ){
-		chat("add entry failed...");	
-		free_inode(fdir->xf, inr);
-		return -1;
-	}
-
-	return inr;
-}
-void
-free_inode( Xfs *xf, int inr)
-{
-	Inode *inode;
-	ulong b, bg;
-	Iobuf *buf;
-	Ext2 ed, es, eb;
-
-	bg = (inr -1) / xf->inodes_per_group;
-	b = (inr -1) % xf->inodes_per_group;
-
-	ed = getext2(xf, EXT2_DESC, bg);
-	buf = getbuf(xf, ed.u.gd->bg_inode_table +
-			(b / xf->inodes_per_block));
-	if( !buf ){
-		putext2(ed);
-		return;
-	}
-	inode = ((struct Inode *) buf->iobuf) + 
-		((inr-1) % xf->inodes_per_block);
-
-	if( S_ISDIR(inode->i_mode) )
-		ed.u.gd->bg_used_dirs_count--;
-	memset(inode, 0, sizeof(Inode));
-	inode->i_dtime = time(0);
-	dirtybuf(buf);
-	putbuf(buf);
-
-	ed.u.gd->bg_free_inodes_count++;
-	dirtyext2(ed);
-	putext2(ed);
-
-	eb = getext2(xf, EXT2_BINODE, bg);
-	clear_bit(b, eb.u.bmp);
-	dirtyext2(eb);
-	putext2(eb);
-	
-	es = getext2(xf, EXT2_SUPER, 0);
-	es.u.sb->s_free_inodes_count++;
-	dirtyext2(es); putext2(es);
-}
-int
-create_dir(Xfile *fdir, char *name, int mode)
-{
-	Xfs *xf = fdir->xf;
-	DirEntry *de;
-	Inode *inode;
-	Iobuf *buf, *ibuf;
-	Xfile tf;
-	int inr, baddr;
-
-	inr = new_inode(fdir, mode);
-	if( inr == 0 ){
-		chat("create one new inode failed...");
-		return -1;
-	}
-	if( add_entry(fdir, name, inr) < 0 ){
-		chat("add entry failed...");
-		free_inode(fdir->xf, inr);
-		return -1;
-	}
-
-	/* create the empty dir */
-
-	tf = *fdir;
-	if( get_inode(&tf, inr) < 0 ){
-		chat("can't get inode %d...", inr);
-		free_inode(fdir->xf, inr);
-		return -1;
-	}
-
-	ibuf = getbuf(xf, tf.bufaddr);
-	if( !ibuf ){
-		free_inode(fdir->xf, inr);
-		return -1;
-	}
-	inode = ((Inode *)ibuf->iobuf) + tf.bufoffset;
-
-	
-	baddr = inode_getblk(&tf, 0);
-	if( !baddr ){
-		putbuf(ibuf);
-		ibuf = getbuf(xf, fdir->bufaddr);
-		if( !ibuf ){
-			free_inode(fdir->xf, inr);
-			return -1;
-		}
-		inode = ((Inode *)ibuf->iobuf) + fdir->bufoffset;
-		delete_entry(fdir->xf, inode, inr);
-		putbuf(ibuf);
-		free_inode(fdir->xf, inr);
-		return -1;
-	}	
-	
-	inode->i_size = xf->block_size;	
-	buf = getbuf(xf, baddr);
-	
-	de = (DirEntry *)buf->iobuf;
-	de->inode = inr;
-	de->name_len = 1;
-	de->rec_len = DIR_REC_LEN(de->name_len);
-	strcpy(de->name, ".");
-	
-	de = (DirEntry *)( (char *)de + de->rec_len);
-	de->inode = fdir->inbr;
-	de->name_len = 2;
-	de->rec_len = xf->block_size - DIR_REC_LEN(1);
-	strcpy(de->name, "..");
-	
-	dirtybuf(buf);
-	putbuf(buf);
-	
-	inode->i_links_count = 2;
-	dirtybuf(ibuf);
-	putbuf(ibuf);
-	
-	ibuf = getbuf(xf, fdir->bufaddr);
-	if( !ibuf )
-		return -1;
-	inode = ((Inode *)ibuf->iobuf) + fdir->bufoffset;
-
-	inode->i_links_count++;
-
-	dirtybuf(ibuf);
-	putbuf(ibuf);
-
-	return inr;
-}
-int
-add_entry(Xfile *f, char *name, int inr)
-{
-	Xfs *xf = f->xf;
-	DirEntry *de, *de1;
-	int offset, baddr;
-	int rec_len, cur_block;
-	int namelen = strlen(name);
-	Inode *inode;
-	Iobuf *buf, *ibuf;
-
-	ibuf = getbuf(xf, f->bufaddr);
-	if( !ibuf )
-		return -1;
-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-
-	if( inode->i_size == 0 ){
-		chat("add_entry() no entry !!!...");
-		putbuf(ibuf);
-		return -1;
-	}
-	cur_block = offset = 0;
-	rec_len = DIR_REC_LEN(namelen);
-	buf = getbuf(xf, inode->i_block[cur_block++]);
-	if( !buf ){
-		putbuf(ibuf);
-		return -1;
-	}
-	de = (DirEntry *)buf->iobuf;
-	
-	for(;;){
-		if( ((char *)de) >= (xf->block_size + buf->iobuf) ){
-			putbuf(buf);
-			if( cur_block >= EXT2_NDIR_BLOCKS ){
-				errno = Enospace;
-				putbuf(ibuf);
-				return -1;
-			}
-			if( (baddr = inode_getblk(f, cur_block++)) == 0 ){
-				putbuf(ibuf);
-				return -1;
-			}
-			buf = getbuf(xf, baddr);
-			if( !buf ){
-				putbuf(ibuf);
-				return -1;
-			}
-			if( inode->i_size <= offset ){
-				de  = (DirEntry *)buf->iobuf;
-				de->inode = 0;
-				de->rec_len = xf->block_size;
-				dirtybuf(buf);
-				inode->i_size = offset + xf->block_size;
-				dirtybuf(ibuf);
-			}else{
-				de = (DirEntry *)buf->iobuf;
-			}
-		}
-		if( de->inode != 0 && de->name_len == namelen &&
-				!strncmp(name, de->name, namelen) ){
-			errno = Eexist;
-			putbuf(ibuf); putbuf(buf);
-			return -1;
-		}
-		offset += de->rec_len;
-		if( (de->inode == 0 && de->rec_len >= rec_len) ||
-				(de->rec_len >= DIR_REC_LEN(de->name_len) + rec_len) ){
-			if( de->inode ){
-				de1 = (DirEntry *) ((char *)de + DIR_REC_LEN(de->name_len));
-				de1->rec_len = de->rec_len - DIR_REC_LEN(de->name_len);
-				de->rec_len = DIR_REC_LEN(de->name_len);
-				de = de1;
-			}	
-			de->inode = inr;
-			de->name_len = namelen;
-			memcpy(de->name, name, namelen);
-			dirtybuf(buf);
-			putbuf(buf);
-			inode->i_mtime = inode->i_ctime = time(0);
-			dirtybuf(ibuf);
-			putbuf(ibuf);
-			return 0;
-		}
-		de = (DirEntry *)((char *)de + de->rec_len);
-	}
-	/* not reached */
-}
-int
-unlink( Xfile *file )
-{
-	Xfs *xf = file->xf;	
-	Inode *dir;
-	int bg, b;
-	Inode *inode;
-	Iobuf *buf, *ibuf;
-	Ext2 ed, es, eb;
-
-	if( S_ISDIR(getmode(file)) && !empty_dir(file) ){
-			chat("non empty directory...");
-			errno = Eperm;
-			return -1;
-	}
-
-	es = getext2(xf, EXT2_SUPER, 0);
-
-	/* get dir inode */
-	if( file->pinbr >= es.u.sb->s_inodes_count ){
-    		chat("inode number %d is too big...",  file->pinbr);
-		putext2(es);
-		errno = Eintern;
-    		return -1;
-	}
-	bg = (file->pinbr - 1) / xf->inodes_per_group;
-	if( bg >= xf->ngroups ){
-		chat("block group (%d) > groups count...", bg);
-		putext2(es);
-		errno = Eintern;
-		return -1;
-	}
-	ed = getext2(xf, EXT2_DESC, bg);
-	b = ed.u.gd->bg_inode_table +
-			(((file->pinbr-1) % xf->inodes_per_group) / 
-			xf->inodes_per_block);
-	putext2(ed);
-	buf = getbuf(xf, b);
-	if( !buf ){	
-		putext2(es);	
-		return -1;
-	}
-	dir = ((struct Inode *) buf->iobuf) + 
-		((file->pinbr-1) % xf->inodes_per_block);
-
-	/* Clean dir entry */
-	
-	if( delete_entry(xf, dir, file->inbr) < 0 ){
-		putbuf(buf);
-		putext2(es);
-		return -1;
-	}
-	if( S_ISDIR(getmode(file)) ){
-		dir->i_links_count--;
-		dirtybuf(buf);
-	}
-	putbuf(buf);
-	
-	/* clean blocks */
-	ibuf = getbuf(xf, file->bufaddr);
-	if( !ibuf ){
-		putext2(es);
-		return -1;
-	}
-	inode = ((Inode *)ibuf->iobuf) + file->bufoffset;
-
-	if( !S_ISLNK(getmode(file)) || 
-		(S_ISLNK(getmode(file)) && (inode->i_size > EXT2_N_BLOCKS<<2)) )
-		if( free_block_inode(file) < 0 ){
-			chat("error while freeing blocks...");
-			putext2(es);
-			putbuf(ibuf);
-			return -1;
-		}
-	
-
-	/* clean inode */	
-	
-	bg = (file->inbr -1) / xf->inodes_per_group;
-	b = (file->inbr -1) % xf->inodes_per_group;
-
-	eb = getext2(xf, EXT2_BINODE, bg);
-	clear_bit(b, eb.u.bmp);
-	dirtyext2(eb);
-	putext2(eb);
-
-	inode->i_dtime = time(0);
-	inode->i_links_count--;
-	if( S_ISDIR(getmode(file)) )
-		inode->i_links_count = 0;
-
-	es.u.sb->s_free_inodes_count++;
-	dirtyext2(es);
-	putext2(es);
-
-	ed = getext2(xf, EXT2_DESC, bg);
-	ed.u.gd->bg_free_inodes_count++;
-	if( S_ISDIR(getmode(file)) )
-		ed.u.gd->bg_used_dirs_count--;
-	dirtyext2(ed);
-	putext2(ed);
-
-	dirtybuf(ibuf);
-	putbuf(ibuf);
-
-	return 1;
-}
-int
-empty_dir(Xfile *dir)
-{
-	Xfs *xf = dir->xf;
-	int nblock;
-	uint offset, i,count;
-	DirEntry *de;
-	Inode *inode;
-	Iobuf *buf, *ibuf;
-	
-	if( !S_ISDIR(getmode(dir)) )
-		return 0;
-
-	ibuf = getbuf(xf, dir->bufaddr);
-	if( !ibuf )
-		return -1;
-	inode = ((Inode *)ibuf->iobuf) + dir->bufoffset;
-	nblock = (inode->i_blocks * 512) / xf->block_size;
-
-	for(i=0, count=0 ; (i < nblock) && (i < EXT2_NDIR_BLOCKS) ; i++){
-		buf = getbuf(xf, inode->i_block[i]);
-		if( !buf ){
-			putbuf(ibuf);
-			return 0;
-		}
-		for(offset=0 ; offset < xf->block_size ;  ){
-			de = (DirEntry *)(buf->iobuf + offset);
-			if(de->inode)
-				count++;
-			offset += de->rec_len;
-		}
-		putbuf(buf);
-		if( count > 2 ){
-			putbuf(ibuf);
-			return 0;
-		}
-	}
-	putbuf(ibuf);
-	return 1;
-}
-int 
-free_block_inode(Xfile *file)
-{
-	Xfs *xf = file->xf;
-	int i, j, k;
-	ulong b, *y, *z;
-	uint *x;
-	int naddr;
-	Inode *inode;
-	Iobuf *buf, *buf1, *buf2, *ibuf;
-
-	ibuf = getbuf(xf, file->bufaddr);
-	if( !ibuf )
-		return -1;
-	inode = ((Inode *)ibuf->iobuf) + file->bufoffset;
-
-	for(i=0 ; i < EXT2_IND_BLOCK ; i++){
-		x = inode->i_block + i;
-		if( *x == 0 ){ putbuf(ibuf); return 0; }
-		free_block(xf, *x);
-	}
-	naddr = xf->addr_per_block;
-
-	/* indirect blocks */
-	
-	if( (b=inode->i_block[EXT2_IND_BLOCK]) ){
-		buf = getbuf(xf, b);
-		if( !buf ){ putbuf(ibuf); return -1; }
-		for(i=0 ; i < naddr ; i++){
-			x = ((uint *)buf->iobuf) + i;
-			if( *x == 0 ) break;
-			free_block(xf, *x);
-		}
-		free_block(xf, b);
-		putbuf(buf);
-	}
-
-	/* double indirect block */
-
-	if( (b=inode->i_block[EXT2_DIND_BLOCK]) ){
-		buf = getbuf(xf, b);
-		if( !buf ){ putbuf(ibuf); return -1; }
-		for(i=0 ; i < naddr ; i++){
-			x = ((uint *)buf->iobuf) + i;
-			if( *x== 0 ) break;
-			buf1 = getbuf(xf, *x);
-			if( !buf1 ){ putbuf(buf); putbuf(ibuf); return -1; }
-			for(j=0 ; j < naddr ; j++){
-				y = ((ulong *)buf1->iobuf) + j;
-				if( *y == 0 ) break;
-				free_block(xf, *y);
-			}
-			free_block(xf, *x);
-			putbuf(buf1);
-		}
-		free_block(xf, b);
-		putbuf(buf);
-	}
-	
-	/* triple indirect block */
-	
-	if( (b=inode->i_block[EXT2_TIND_BLOCK]) ){
-		buf = getbuf(xf, b);
-		if( !buf ){ putbuf(ibuf); return -1; }
-		for(i=0 ; i < naddr ; i++){
-			x = ((uint *)buf->iobuf) + i;
-			if( *x == 0 ) break;
-			buf1 = getbuf(xf, *x);
-			if( !buf1 ){ putbuf(buf); putbuf(ibuf); return -1; }
-			for(j=0 ; j < naddr ; j++){
-				y = ((ulong *)buf1->iobuf) + j;
-				if( *y == 0 ) break;
-				buf2 = getbuf(xf, *y);
-				if( !buf2 ){ putbuf(buf); putbuf(buf1); putbuf(ibuf); return -1; }
-				for(k=0 ; k < naddr ; k++){
-					z = ((ulong *)buf2->iobuf) + k;
-					if( *z == 0 ) break;
-					free_block(xf, *z);
-				}
-				free_block(xf, *y);
-				putbuf(buf2);
-			}
-			free_block(xf, *x);
-			putbuf(buf1);
-		}
-		free_block(xf, b);
-		putbuf(buf);
-	}
-
-	putbuf(ibuf);
-	return 0;
-}
-void free_block( Xfs *xf, ulong block )
-{
-	ulong bg;
-	Ext2 ed, es, eb;
-
-	es = getext2(xf, EXT2_SUPER, 0);
-
-	bg = (block - es.u.sb->s_first_data_block) / xf->blocks_per_group;
-	block = (block - es.u.sb->s_first_data_block) % xf->blocks_per_group;
-
-	eb = getext2(xf, EXT2_BBLOCK, bg);
-	clear_bit(block, eb.u.bmp);
-	dirtyext2(eb);
-	putext2(eb);
-
-	es.u.sb->s_free_blocks_count++;
-	dirtyext2(es);
-	putext2(es);
-
-	ed = getext2(xf, EXT2_DESC, bg);
-	ed.u.gd->bg_free_blocks_count++;
-	dirtyext2(ed);
-	putext2(ed);
-
-}
-int 
-delete_entry(Xfs *xf, Inode *inode, int inbr)
-{
-	int nblock = (inode->i_blocks * 512) / xf->block_size;
-	uint offset, i;
-	DirEntry *de, *pde;
-	Iobuf *buf;
-	
-	if( !S_ISDIR(inode->i_mode) )
-		return -1;
-
-	for(i=0 ; (i < nblock) && (i < EXT2_NDIR_BLOCKS) ; i++){
-		buf = getbuf(xf, inode->i_block[i]);
-		if( !buf )
-			return -1;
-		pde = 0;
-		for(offset=0 ; offset < xf->block_size ;  ){
-			de = (DirEntry *)(buf->iobuf + offset);
-			if( de->inode == inbr ){
-				if( pde )
-					pde->rec_len += de->rec_len;
-				de->inode = 0;
-				dirtybuf(buf);
-				putbuf(buf);
-				return 1;
-			}
-			offset += de->rec_len;
-			pde = de;
-		}
-		putbuf(buf);
-
-	}
-	errno = Enonexist;
-	return -1;
-}
-int
-truncfile(Xfile *f)
-{
-	Inode *inode;
-	Iobuf *ibuf;
-	chat("trunc(fid=%d) ...", f->fid);
-	ibuf = getbuf(f->xf, f->bufaddr);
-	if( !ibuf )
-		return -1;
-	inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-	
-	if( free_block_inode(f) < 0 ){
-		chat("error while freeing blocks...");
-		putbuf(ibuf);
-		return -1;
-	}
-	inode->i_atime = inode->i_mtime = time(0);
-	inode->i_blocks = 0;
-	inode->i_size = 0;
-	memset(inode->i_block, 0, EXT2_N_BLOCKS*sizeof(ulong));
-	dirtybuf(ibuf);
-	putbuf(ibuf);
-	chat("trunc ok...");
-	return 0;
-}
-long
-getmode(Xfile *f)
-{
-	Iobuf *ibuf;
-	long mode;
-
-	ibuf = getbuf(f->xf, f->bufaddr);
-	if( !ibuf )
-		return -1;
-	mode = (((Inode *)ibuf->iobuf) + f->bufoffset)->i_mode;
-	putbuf(ibuf);
-	return mode;
-}
-void
-CleanSuper(Xfs *xf)
-{
-	Ext2 es;
-
-	es = getext2(xf, EXT2_SUPER, 0);
-	es.u.sb->s_state = EXT2_VALID_FS;
-	dirtyext2(es);
-	putext2(es);
-}
-int 
-test_bit(int i, void *data)
-{
-	char *pt = (char *)data;
-
-	return pt[i>>3] & (0x01 << (i&7));
-}
-
-int
-set_bit(int i, void *data)
-{
-  	char *pt;
-
-  	if( test_bit(i, data) )
-    		return 1; /* bit already set !!! */
-  
-  	pt = (char *)data;
-  	pt[i>>3] |= (0x01 << (i&7));
-
-  	return 0;
-}
-
-int 
-clear_bit(int i, void *data)
-{
-	char *pt;
-
-  	if( !test_bit(i, data) )
-    		return 1; /* bit already clear !!! */
-  
- 	 pt = (char *)data;
-  	pt[i>>3] &= ~(0x01 << (i&7));
-	
-	return 0;
-}
-void *
-memscan( void *data, int c, int count )
-{
-	char *pt = (char *)data;
-
-	while( count ){
-		if( *pt == c )
-			return (void *)pt;
-		count--;
-		pt++;
-	}
-	return (void *)pt;
-}
-
-int 
-find_first_zero_bit( void *data, int count /* in byte */)
-{
-  char *pt = (char *)data;
-  int n, i;
-  
-  n = 0;
-
-  while( n < count ){
-    for(i=0 ; i < 8 ; i++)
-      if( !(*pt & (0x01 << (i&7))) )
-	return (n<<3) + i;
-    n++; pt++;
-  }
-  return n << 3;
-}
-
-int 
-find_next_zero_bit( void *data, int count /* in byte */, int where)
-{
-  char *pt = (((char *)data) + (where >> 3));
-  int n, i;
-  
-  n = where >> 3;
-  i = where & 7;
-
-  while( n < count ){
-    for(; i < 8 ; i++)
-      if( !(*pt & (0x01 << (i&7))) )
-	return (n<<3) + i;
-    n++; pt++; i=0;
-  }
-  return n << 3;
-}
-int
-ffz( int x )
-{
-	int c = 0;
-	while( x&1 ){
-		c++;
-		x >>= 1;
-	}
-	return c;
-}
--- a/sys/src/cmd/ext2srv/fns.h
+++ /dev/null
@@ -1,70 +1,0 @@
-void	chat(char*, ...);
-Xfile *	clean(Xfile*);
-void	dirdump(void*);
-int	dosfs(Xfs*);
-int	emptydir(Xfile*);
-int	falloc(Xfs*);
-int	fileaddr(Xfile*, int, int);
-int	getfat(Xfs*, int);
-int	getfile(Xfile*);
-Xfs *	getxfs(char*);
-void	panic(char*, ...);
-void	putfat(Xfs*, int, int);
-void	putfile(Xfile*);
-void	refxfs(Xfs*, int);
-long	writefile(Xfile*, void*, vlong, long);
-char *	xerrstr(int);
-Xfile *	xfile(Fid*, int);
-int	xfspurge(void);
-
-int ext2fs(Xfs *);
-int get_inode( Xfile *, uint);
-char *getname(Xfile *, char *);
-int get_file(Xfile *, char *);
-int bmap( Xfile *f, int block );
-int ffz(int);
-long	readdir(Xfile*, void*, vlong, long);
-long	readfile(Xfile*, void*, vlong, long);
-void dostat(Qid, Xfile *, Dir *);
-int new_block( Xfile *, int);
-int test_bit(int, void *);
-int set_bit(int, void *);
-int  clear_bit(int , void *);
-void *memscan(void *, int, int);
-int find_first_zero_bit(void *, int);
-int find_next_zero_bit(void *, int, int);
-int block_getblk(Xfile *, int, int);
-int inode_getblk(Xfile *, int);
-int getblk(Xfile *, int);
-int  new_inode(Xfile *, int);
-int add_entry(Xfile *, char *, int);
-int create_file(Xfile *, char *, int);
-int create_dir(Xfile *, char *, int);
-int unlink(Xfile *);
-int  delete_entry(Xfs *, Inode *, int);
-int  free_block_inode(Xfile *);
-void free_block( Xfs *, ulong);
-void free_inode( Xfs *, int);
-int empty_dir(Xfile *);
-int truncfile(Xfile *);
-int dowstat(Xfile *, Dir *);
-long getmode(Xfile *);
-Ext2 getext2(Xfs *, char, int);
-void CleanSuper(Xfs *);
-
-/* Iobuf operations */
-
-Iobuf *getbuf(Xfs *, long addr);
-void putbuf(Iobuf *);
-void purgebuf(Xfs *);
-void iobuf_init(void);
-int xread(Xfs *, Iobuf *, long);
-void syncbuf(void);
-void xwrite(Iobuf *);
-void dirtybuf(Iobuf *);
-
-void mchat(char *fmt, ...);
-void dumpbuf(void);
-
-void gidfile(char*);
-void uidfile(char*);
--- a/sys/src/cmd/ext2srv/iobuf.c
+++ /dev/null
@@ -1,174 +1,0 @@
-#include <u.h>
-#include <libc.h>
-#include <fcall.h>
-#include <thread.h>
-#include <9p.h>
-#include "dat.h"
-#include "fns.h"
-
-#define	NIOBUF		100
-#define	HIOB		(NIOBUF/3)
-
-static Iobuf*	hiob[HIOB];		/* hash buckets */
-static Iobuf	iobuf[NIOBUF];		/* buffer headers */
-static Iobuf*	iohead;
-static Iobuf*	iotail;
-
-Iobuf*
-getbuf(Xfs *dev, long addr)
-{
-	Iobuf *p, *h, **l, **f;
-
-	l = &hiob[addr%HIOB];
-	for(p = *l; p; p = p->hash) {
-		if(p->addr == addr && p->dev == dev) {
-			p->busy++;
-			return p;
-		}
-	}
-	/* Find a non-busy buffer from the tail */
-	for(p = iotail; p && (p->busy > 0); p = p->prev)
-		;
-	if(!p)
-		panic("all buffers busy");
-	if(p->dirty){
-		xwrite(p);
-		p->dirty = 0;
-	}
-
-	if( xread(dev, p, addr) < 0)
-		return 0;
-	/* Delete from hash chain */
-	f = &hiob[p->addr%HIOB];
-	if( *f == p )
-		*f = p->hash;
-	else {
-		for(h = *f; h ; h = h->hash)
-			if( h->hash == p ){
-				h->hash = p->hash;
-				break;
-			}
-	}
-	/* Fill and hash */
-	p->hash = *l;
-	*l = p;
-	p->addr = addr;
-	p->dev = dev;
-	p->busy=1;
-
-	return p;
-}
-void
-putbuf(Iobuf *p)
-{
-	if(p->busy <= 0)
-		panic("putbuf");
-	p->busy--;
-
-	/* Link onto head for lru */
-	if(p == iohead)
-		return;
-	if( p == iotail ){
-		p->prev->next = 0;
-		iotail = p->prev;
-	}else{
-		p->prev->next = p->next;
-		p->next->prev = p->prev;
-	}
-
-	p->prev = 0;
-	p->next = iohead;
-	iohead->prev = p;
-	iohead = p;
-}
-void
-dirtybuf(Iobuf *p)
-{
-	if(p->busy <=0)
-		panic("dirtybuf");
-	p->dirty = 1;
-}
-void
-syncbuf(void)
-{
-	Iobuf *p;
-
-	for(p=&iobuf[0] ; p<&iobuf[NIOBUF]; p++)
-		if( p->dirty ){
-			xwrite(p);
-			p->dirty = 0;
-		}
-}
-void
-purgebuf(Xfs *dev)
-{
-	Iobuf *p;
-
-	for(p=&iobuf[0]; p<&iobuf[NIOBUF]; p++)
-		if(p->dev == dev)
-			p->busy = 0;
-
-	/* Blow hash chains */
-	memset(hiob, 0, sizeof(hiob));
-}
-void
-iobuf_init(void)
-{
-	Iobuf *p;
-
-	iohead = iobuf;
-	iotail = iobuf+NIOBUF-1;
-
-	for(p = iobuf; p <= iotail; p++) {
-		p->next = p+1;
-		p->prev = p-1;
-		
-		p->iobuf = (char *)malloc(EXT2_MAX_BLOCK_SIZE);
-		if(p->iobuf == 0)
-			panic("iobuf_init");
-	}
-
-	iohead->prev = 0;
-	iotail->next = 0;
-}
-int
-xread(Xfs *dev, Iobuf *p, long addr)
-{
-	/*chat("xread %d,%d...", dev->dev, addr);*/
-
-	seek(dev->dev, (vlong)addr*dev->block_size, 0);
-	if(read(dev->dev, p->iobuf, dev->block_size) != dev->block_size){
-		chat("xread %d, block=%d failed ...", dev->dev, addr);
-		errno = Eio;
-		return -1;
-	}
-	/*chat("xread ok...");*/
-	return 0;
-}
-void 
-xwrite(Iobuf *p)
-{
-	Xfs *dev;
-	long addr;
-
-	dev = p->dev;
-	addr = p->addr;
-	/*chat("xwrite %d,%d...", dev->dev, addr);*/
-
-	seek(dev->dev, (vlong)addr*dev->block_size, 0);
-	if(write(dev->dev, p->iobuf, dev->block_size) != dev->block_size){
-		chat("xwrite %d, block=%d failed ...", dev->dev, addr);
-		errno = Eio;
-		return;
-	}
-	/*chat("xwrite ok...");*/
-}
-void
-dumpbuf(void)
-{
-	Iobuf *p;
-	
-	for(p = iotail; p ; p = p->prev)
-		if( p->busy )
-			mchat("\nHi ERROR buf(%x, %d, %d)\n", p, p->addr, p->busy);	
-}
--- a/sys/src/cmd/ext2srv/mkfile
+++ /dev/null
@@ -1,18 +1,0 @@
-</$objtype/mkfile
-
-TARG=ext2srv
-OFILES=\
-	xfssrv.$O\
-	xfile.$O\
-	ext2fs.$O\
-	ext2subs.$O\
-	chat.$O\
-	iobuf.$O\
-
-HFILES=dat.h\
-	fns.h\
-
-BIN=/$objtype/bin
-</sys/src/cmd/mkone
-
-xfssrv.$O:	errstr.h
--- a/sys/src/cmd/ext2srv/readme
+++ /dev/null
@@ -1,53 +1,0 @@
-Ext2srv Version 0.2
-----------------
-
-Ext2srv is a file server that interprets EXT2 file systems. Ext2srv is identical
-to dossrv in specification. 
-
-I added just one option. By default ext2srv search for the first ext2 partition
-on the device (typically a disk) given by the mount spec option (see bind(1)).
-So, if you have different ext2 partitions on the same disk you can select one 
-of them by adding the partition number at the end of the device in the mount
-system call. For example 
-
-	mount -c /srv/ext2 /n/linux /dev/hd1disk:3
-
-forces the server to look for ext2 filesystem on the third partition of your second
-hard drive.
-
-
-WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
-
-	Ext2srv uses some cache. So you must unmount the directory where you 
-	mount your ext2 partition. It's the only way to synchronise dirty buffers
-	with the disk. 
-
-	Don't reboot your terminal (^t^t r) without explicitly unmount.
-
-	Using something like this script is recommended :
-
-		#!/bin/rc
-	
-		unmount /n/linux >[2] /dev/null
-		unmount /n/linux2 >[2] /dev/null
-		disk/kfscmd halt
-
-WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING 
-
-
-I provide this software `as is' and without any warranty. Feed back are welcome !!!
-
[email protected]
-
-changes 5/17/2000 - threw away partition table
-walking, fixed name_len (it's a uchar not a ushort).
-
--rsc
-
-changes for 4th edition 13 May 2002 - [email protected]
- - adapted for 9P2000
- - added [-p passwd] [-g group] args as in tapefs(4)
- - create makes files with user and group of parent directory (not 100/200)
- - prevent writing to non-regular files
- - correct calculation of group descriptor block location when bsize!=1024
-
--- a/sys/src/cmd/ext2srv/version
+++ /dev/null
@@ -1,36 +1,0 @@
-# ext2srv
-# [bl]
-
-on trouve le numero de version sur les 2 premières lignes du
-fichier ext2subs.c.
-
-
-Version 0.1 : 
-
-1)	il n'ya plus de copie d'inode
-	tous les iobuf utilisés dans un fonction 
-	sont libérés. Un getbuf() => Un putbuf.
-
-2)	Tous dans les iobufs : super, group desc et bitmaps
-
-3)	Il n'ya plus aucune reférence au contenu d'une inode dans la
-	structure Xfile.
-
-4)	Choix de la parition en passant /dev/hd?disk:n lors du mount
-
-
-Version 0.11 :
-
-1)	-v affiche les blocks manipulés en lecture et écriture [18/10/96]
-
-2)	bug pour open avec TRUNC sur les liens... fixed [19/10/96]
-
-3)	maintenant on jette si la taille des blocks != 1024 dans ext2fs() [21/10/96]
-	(c'est quand même mieux pour le moment ...)
-
-
-Version 0.20 :
-
-1)	les blocks de 1024, 2048, 4096 octets sont supportés. [22/10/96]
-
-2)	le bug sur le qid.vers est détecté mais non corrigé...
\ No newline at end of file
--- a/sys/src/cmd/ext2srv/xfile.c
+++ /dev/null
@@ -1,161 +1,0 @@
-#include <u.h>
-#include <libc.h>
-#include <fcall.h>
-#include <thread.h>
-#include <9p.h>
-#include "dat.h"
-#include "fns.h"
-
-
-static Xfs	*xhead;
-static Xfile *freelist;
-static Lock	xlock, freelock;
-
-int	client;
-
-Xfs *
-getxfs(char *name)
-{
-	int fd;
-	Dir *dir;
-	Xfs *xf, *fxf;
-
-	if(name==0 || name[0]==0)
-		name = deffile;
-	if(name == 0){
-		errno = Enofilsys;
-		return 0;
-	}
-	fd = open(name, rdonly ? OREAD : ORDWR);
-	if(fd < 0){
-		errno = Enonexist;
-		return 0;
-	}
-	if((dir = dirfstat(fd)) == 0){
-		errno = Eio;
-		close(fd);
-		return 0;
-	}
-	lock(&xlock);
-	for(fxf=0, xf=xhead; xf; xf=xf->next){
-		if(xf->ref == 0){
-			if(fxf == 0)
-				fxf = xf;
-			continue;
-		}
-		if(xf->qid.path != dir->qid.path || xf->qid.vers != dir->qid.vers)
-			continue;
-		if(strcmp(xf->name, name) != 0 || xf->dev < 0)
-			continue;
-		chat("incref \"%s\", dev=%d...", xf->name, xf->dev);
-		++xf->ref;
-		unlock(&xlock);
-		close(fd);
-		free(dir);
-		return xf;
-	}
-	if(fxf==0){
-		fxf = malloc(sizeof(Xfs));
-		if(fxf==0){
-			unlock(&xlock);
-			close(fd);
-			free(dir);
-			errno = Enomem;
-			return 0;
-		}
-		fxf->next = xhead;
-		xhead = fxf;
-	}
-	chat("alloc \"%s\", dev=%d...", name, fd);
-	fxf->name = strdup(name);
-	fxf->ref = 1;
-	fxf->qid = dir->qid;
-	fxf->dev = fd;
-	fxf->fmt = 0;
-	fxf->ptr = 0;
-	free(dir);
-	if( ext2fs(fxf)<0 ){ 
-		xhead = fxf->next;
-		free(fxf);
-		unlock(&xlock);
-		return 0;
-	}
-	unlock(&xlock);
-	return fxf;
-}
-
-void
-refxfs(Xfs *xf, int delta)
-{
-	lock(&xlock);
-	xf->ref += delta;
-	if(xf->ref == 0){
-		/*mchat("free \"%s\", dev=%d...", xf->name, xf->dev);
-		dumpbuf();*/
-		CleanSuper(xf);
-		syncbuf();
-		free(xf->name);
-		purgebuf(xf);
-		if(xf->dev >= 0){
-			close(xf->dev);
-			xf->dev = -1;
-		}
-	}
-	unlock(&xlock);
-}
-
-Xfile *
-xfile(Fid *fid, int flag)
-{
-	Xfile *f;
-
-	f = (Xfile*)fid->aux;
-	switch(flag){
-	default:
-		panic("xfile");
-	case Asis:
-		return (f && f->xf && f->xf->dev < 0) ? 0 : f;
-	case Clean:
-		if (f) chat("Clean and fid->aux already exists\n");
-		break;
-	case Clunk:
-		if(f){
-			clean(f);
-			lock(&freelock);
-			f->next = freelist;
-			freelist = f;
-			unlock(&freelock);
-			fid->aux = 0;
-		}
-		return 0;
-	}
-	if(f)
-		return clean(f);
-	lock(&freelock);
-	if(f = freelist){	/* assign = */
-		freelist = f->next;
-		unlock(&freelock);
-	} else {
-		unlock(&freelock);
-		f = malloc(sizeof(Xfile));
-	}
-	fid->aux = f;
-	f->fid = fid->fid;
-	f->client = client;
-	f->xf = 0;
-	f->ptr = 0;
-	f->root = 0;
-	return f;
-}
-Xfile *
-clean(Xfile *f)
-{
-	if(f->xf && f->root){
-		refxfs(f->xf, -1);
-		f->xf = 0;
-	}
-	f->xf = 0;
-	f->root = 0;
-	f->dirindex = 0;
-	return f;
-}
--- a/sys/src/cmd/ext2srv/xfssrv.c
+++ /dev/null
@@ -1,91 +1,0 @@
-#include <u.h>
-#include <libc.h>
-#include <fcall.h>
-#include <thread.h>
-#include <9p.h>
-#include "dat.h"
-#include "fns.h"
-
-#include "errstr.h"
-
-int	errno;
-int rdonly;
-char	*srvfile;
-char	*deffile;
-
-extern void iobuf_init(void);
-extern Srv ext2srv;
-
-void
-usage(void)
-{
-	fprint(2, "usage: %s [-v] [-s] [-r] [-p passwd] [-g group] [-f devicefile] [srvname]\n", argv0);
-	exits("usage");
-}
-
-/*void handler(void *v, char *sig)
-{
-	USED(v,sig);
-	syncbuf();
-	noted(NDFLT);
-}*/
-
-void
-main(int argc, char **argv)
-{
-	int stdio;
-
-	stdio = 0;
-	ARGBEGIN{
-	case 'D':
-		++chatty9p;
-		break;
-	case 'v':
-		++chatty;
-		break;
-	case 'f':
-		deffile = ARGF();
-		break;
-	case 'g':
-		gidfile(ARGF());
-		break;
-	case 'p':
-		uidfile(ARGF());
-		break;
-	case 's':
-		stdio = 1;
-		break;
-	case 'r':
-		rdonly = 1;
-		break;
-	default:
-		usage();
-	}ARGEND
-
-	if(argc == 0)
-		srvfile = "ext2";
-	else if(argc == 1)
-		srvfile = argv[0];
-	else
-		usage();
-	
-	iobuf_init();
-	/*notify(handler);*/
-
-	if(stdio){
-		srv(&ext2srv);
-	}else{
-		chat("%s %d: serving %s\n", argv0, getpid(), srvfile);
-		postmountsrv(&ext2srv, srvfile, 0, 0);
-	}
-	exits(0);
-}
-
-char *
-xerrstr(int e)
-{
-	if (e < 0 || e >= sizeof errmsg/sizeof errmsg[0])
-		return "no such error";
-	else
-		return errmsg[e];
-}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/LICENSE
@@ -1,0 +1,36 @@
+Files include/tree.h and include/queue.h include their own respective
+(BSD 4-clause) license, as well as ext4_hash.c (BSD 2-clause).
+
+The rest of the files are licensed under BSD 3-clause:
+
+Copyright (c) 2013-2017 Grzegorz Kostka ([email protected])
+Copyright (c) 2015-2017 Kaho Ng ([email protected])
+Copyright (c) 2020-2024 Sigrid Solveig Haflínudóttir ([email protected])
+
+HelenOS: Copyright (c) 2012 Martin Sucha
+         Copyright (c) 2012 Frantisek Princ
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+- Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+- The name of the author may not be used to endorse or promote products
+  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- /dev/null
+++ b/sys/src/cmd/ext4srv/NOTES
@@ -1,0 +1,5 @@
+Based on https://github.com/gkostka/lwext4
+
+None of the sources are GPL-licensed:
+ * xattr handling removed altogether
+ * extents logic replaced with a BSD-3 licensed one found in https://github.com/ngkaho1234/lwext
--- /dev/null
+++ b/sys/src/cmd/ext4srv/common.h
@@ -1,0 +1,50 @@
+typedef struct Opts Opts;
+typedef struct Part Part;
+
+#pragma varargck type "Ð" Part*
+#pragma varargck type "M" Part*
+
+struct Opts {
+	char *group;
+	int cachewb;
+	int linkmode;
+	int asroot;
+	int rdonly;
+
+	int fstype;
+	int blksz;
+	int inodesz;
+	u32int ninode;
+	char *label;
+};
+
+struct Part {
+	Ref;
+	QLock;
+	Part *prev, *next;
+
+	char dev[32];
+	char mnt[32];
+	char *partdev;
+
+	struct ext4_blockdev bdev;
+	struct ext4_blockdev_iface bdif;
+	struct ext4_sblock *sb;
+	struct ext4_lock oslocks;
+	Qid qid;
+	Qid qidmask;
+	Groups groups;
+	int f;
+	uchar blkbuf[];
+};
+
+enum {
+	Lhide,
+	Lresolve = 1,
+};
+
+Part *openpart(char *dev, Opts *opts);
+void closepart(Part *p);
+void closeallparts(void);
+void statallparts(void);
+void syncallparts(void);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4.c
@@ -1,0 +1,2961 @@
+#include "ext4_config.h"
+#include "ext4.h"
+#include "ext4_trans.h"
+#include "ext4_fs.h"
+#include "ext4_dir.h"
+#include "ext4_inode.h"
+#include "ext4_super.h"
+#include "ext4_block_group.h"
+#include "ext4_dir_idx.h"
+#include "ext4_journal.h"
+
+char Eexists[] = "file exists";
+char Einval[] = "invalid operation";
+char Eio[] = "i/o error";
+char Enomem[] = "no memory";
+char Enospc[] = "no space";
+char Enotfound[] = "file not found";
+char Eperm[] = "permission denied";
+char Erdonlyfs[] = "read-only fs";
+
+/**@brief   Mount point OS dependent lock*/
+#define EXT4_MP_LOCK(_m)                                               \
+	do {                                                               \
+		if ((_m)->os_locks)                                            \
+			(_m)->os_locks->lock((_m)->os_locks->p_user);              \
+	} while (0)
+
+/**@brief   Mount point OS dependent unlock*/
+#define EXT4_MP_UNLOCK(_m)                                             \
+	do {                                                               \
+		if ((_m)->os_locks)                                            \
+			(_m)->os_locks->unlock((_m)->os_locks->p_user);            \
+	} while (0)
+
+/**@brief   Mount point descriptor.*/
+struct ext4_mountpoint {
+
+	/**@brief   Mount done flag.*/
+	bool mounted;
+
+	/**@brief   Mount point name (@ref ext4_mount)*/
+	char name[CONFIG_EXT4_MAX_MP_NAME + 1];
+
+	/**@brief   OS dependent lock/unlock functions.*/
+	const struct ext4_lock *os_locks;
+
+	/**@brief   Ext4 filesystem internals.*/
+	struct ext4_fs fs;
+
+	/**@brief   JBD fs.*/
+	struct jbd_fs jbd_fs;
+
+	/**@brief   Journal.*/
+	struct jbd_journal jbd_journal;
+
+	/**@brief   Block cache.*/
+	struct ext4_bcache bc;
+};
+
+/**@brief   Block devices descriptor.*/
+struct ext4_block_devices {
+
+	/**@brief   Block device name.*/
+	char name[CONFIG_EXT4_MAX_BLOCKDEV_NAME + 1];
+
+	/**@brief   Block device handle.*/
+	struct ext4_blockdev *bd;
+};
+
+/**@brief   Block devices.*/
+static struct ext4_block_devices s_bdevices[CONFIG_EXT4_BLOCKDEVS_COUNT];
+
+/**@brief   Mountpoints.*/
+static struct ext4_mountpoint s_mp[CONFIG_EXT4_MOUNTPOINTS_COUNT];
+
+int ext4_device_register(struct ext4_blockdev *bd,
+			 const char *dev_name)
+{
+	assert(bd && dev_name);
+
+	if (strlen(dev_name) > CONFIG_EXT4_MAX_BLOCKDEV_NAME) {
+		werrstr("dev name too long: %s", dev_name);
+		return -1;
+	}
+
+	for (usize i = 0; i < CONFIG_EXT4_BLOCKDEVS_COUNT; ++i) {
+		if (!strcmp(s_bdevices[i].name, dev_name)) {
+			werrstr("dev already exists: %s", dev_name);
+			return -1;
+		}
+	}
+
+	for (usize i = 0; i < CONFIG_EXT4_BLOCKDEVS_COUNT; ++i) {
+		if (!s_bdevices[i].bd) {
+			strcpy(s_bdevices[i].name, dev_name);
+			s_bdevices[i].bd = bd;
+			return 0;
+		}
+	}
+
+	werrstr("dev limit reached");
+	return -1;
+}
+
+int ext4_device_unregister(const char *dev_name)
+{
+	assert(dev_name);
+
+	for (usize i = 0; i < CONFIG_EXT4_BLOCKDEVS_COUNT; ++i) {
+		if (strcmp(s_bdevices[i].name, dev_name) == 0) {
+            memset(&s_bdevices[i], 0, sizeof(s_bdevices[i]));
+            return 0;
+        }
+	}
+
+	werrstr("dev not found: %s", dev_name);
+	return -1;
+}
+
+int ext4_device_unregister_all(void)
+{
+	memset(s_bdevices, 0, sizeof(s_bdevices));
+
+	return 0;
+}
+
+static bool ext4_is_dots(const u8int *name, usize name_size)
+{
+	if ((name_size == 1) && (name[0] == '.'))
+		return true;
+
+	if ((name_size == 2) && (name[0] == '.') && (name[1] == '.'))
+		return true;
+
+	return false;
+}
+
+static int ext4_has_children(bool *has_children, struct ext4_inode_ref *enode)
+{
+	struct ext4_sblock *sb = &enode->fs->sb;
+
+	/* Check if node is directory */
+	if (!ext4_inode_is_type(sb, enode->inode, EXT4_INODE_MODE_DIRECTORY)) {
+		*has_children = false;
+		return 0;
+	}
+
+	struct ext4_dir_iter it;
+	int rc = ext4_dir_iterator_init(&it, enode, 0);
+	if (rc != 0)
+		return rc;
+
+	/* Find a non-empty directory entry */
+	bool found = false;
+	while (it.curr != nil) {
+		if (ext4_dir_en_get_inode(it.curr) != 0) {
+			u16int nsize;
+			nsize = ext4_dir_en_get_name_len(sb, it.curr);
+			if (!ext4_is_dots(it.curr->name, nsize)) {
+				found = true;
+				break;
+			}
+		}
+
+		rc = ext4_dir_iterator_next(&it);
+		if (rc != 0) {
+			ext4_dir_iterator_fini(&it);
+			return rc;
+		}
+	}
+
+	rc = ext4_dir_iterator_fini(&it);
+	if (rc != 0)
+		return rc;
+
+	*has_children = found;
+
+	return 0;
+}
+
+static int ext4_link(struct ext4_mountpoint *mp, struct ext4_inode_ref *parent,
+		     struct ext4_inode_ref *ch, const char *n,
+		     u32int len, bool rename)
+{
+	/* Check maximum name length */
+	if (len > EXT4_DIRECTORY_FILENAME_LEN) {
+		werrstr("entry name too long: %s", n);
+		return -1;
+	}
+
+	/* Add entry to parent directory */
+	int r = ext4_dir_add_entry(parent, n, len, ch);
+	if (r != 0)
+		return r;
+
+	/* Fill new dir -> add '.' and '..' entries.
+	 * Also newly allocated inode should have 0 link count.
+	 */
+
+	bool is_dir = ext4_inode_is_type(&mp->fs.sb, ch->inode,
+			       EXT4_INODE_MODE_DIRECTORY);
+	if (is_dir && !rename) {
+		/* Initialize directory index if supported */
+		if (ext4_sb_feature_com(&mp->fs.sb, EXT4_FCOM_DIR_INDEX)) {
+			r = ext4_dir_dx_init(ch, parent);
+			if (r != 0)
+				return r;
+
+			ext4_inode_set_flag(ch->inode, EXT4_INODE_FLAG_INDEX);
+			ch->dirty = true;
+		} else
+
+		r = ext4_dir_add_entry(ch, ".", strlen("."), ch);
+		if (r != 0) {
+			ext4_dir_remove_entry(parent, n, strlen(n));
+			return r;
+		}
+
+		r = ext4_dir_add_entry(ch, "..", strlen(".."), parent);
+		if (r != 0) {
+			ext4_dir_remove_entry(parent, n, strlen(n));
+			ext4_dir_remove_entry(ch, ".", strlen("."));
+			return r;
+		}
+
+		/*New empty directory. Two links (. and ..) */
+		ext4_inode_set_links_cnt(ch->inode, 2);
+		ext4_fs_inode_links_count_inc(parent);
+		ch->dirty = true;
+		parent->dirty = true;
+		return r;
+	}
+	/*
+	 * In case we want to rename a directory,
+	 * we reset the original '..' pointer.
+	 */
+	if (is_dir) {
+		bool idx;
+		idx = ext4_inode_has_flag(ch->inode, EXT4_INODE_FLAG_INDEX);
+		struct ext4_dir_search_result res;
+		if (!idx) {
+			r = ext4_dir_find_entry(&res, ch, "..", strlen(".."));
+			if (r != 0) {
+				werrstr(Eio);
+				return -1;
+			}
+
+			ext4_dir_en_set_inode(res.dentry, parent->index);
+			ext4_trans_set_block_dirty(res.block.buf);
+			r = ext4_dir_destroy_result(ch, &res);
+			if (r != 0)
+				return r;
+
+		} else {
+			r = ext4_dir_dx_reset_parent_inode(ch, parent->index);
+			if (r != 0)
+				return r;
+		}
+
+		ext4_fs_inode_links_count_inc(parent);
+		parent->dirty = true;
+	}
+	if (!rename) {
+		ext4_fs_inode_links_count_inc(ch);
+		ch->dirty = true;
+	}
+
+	return r;
+}
+
+static int ext4_unlink(struct ext4_mountpoint *mp,
+		       struct ext4_inode_ref *parent,
+		       struct ext4_inode_ref *child, const char *name,
+		       u32int name_len)
+{
+	bool has_children;
+	int rc = ext4_has_children(&has_children, child);
+	if (rc != 0)
+		return rc;
+
+	/* Cannot unlink non-empty node */
+	if (has_children) {
+		werrstr("remove -- directory not empty");
+		return -1;
+	}
+
+	/* Remove entry from parent directory */
+	rc = ext4_dir_remove_entry(parent, name, name_len);
+	if (rc != 0)
+		return rc;
+
+	bool is_dir = ext4_inode_is_type(&mp->fs.sb, child->inode,
+					 EXT4_INODE_MODE_DIRECTORY);
+
+	/* If directory - handle links from parent */
+	if (is_dir) {
+		ext4_fs_inode_links_count_dec(parent);
+		parent->dirty = true;
+	}
+
+	/*
+	 * TODO: Update timestamps of the parent
+	 * (when we have wall-clock time).
+	 *
+	 * ext4_inode_set_change_inode_time(parent->inode, (u32int) now);
+	 * ext4_inode_set_modification_time(parent->inode, (u32int) now);
+	 * parent->dirty = true;
+	 */
+
+	/*
+	 * TODO: Update timestamp for inode.
+	 *
+	 * ext4_inode_set_change_inode_time(child->inode,
+	 *     (u32int) now);
+	 */
+	if (ext4_inode_get_links_cnt(child->inode)) {
+		ext4_fs_inode_links_count_dec(child);
+		child->dirty = true;
+	}
+
+	return 0;
+}
+
+int ext4_mount(const char *dev_name, const char *mount_point,
+	       bool read_only)
+{
+	int r;
+	u32int bsize;
+	struct ext4_bcache *bc;
+	struct ext4_blockdev *bd = 0;
+	struct ext4_mountpoint *mp = 0;
+
+	assert(mount_point && dev_name);
+
+	usize mp_len = strlen(mount_point);
+
+	if (mp_len > CONFIG_EXT4_MAX_MP_NAME) {
+		werrstr("mount point name too long: %s", mount_point);
+		return -1;
+	}
+
+	if (mount_point[mp_len - 1] != '/') {
+		werrstr("invalid mount point: %s", mount_point);
+		return -1;
+	}
+
+	for (usize i = 0; i < CONFIG_EXT4_BLOCKDEVS_COUNT; ++i) {
+		if (!strcmp(dev_name, s_bdevices[i].name)) {
+			bd = s_bdevices[i].bd;
+			break;
+		}
+	}
+
+	if (!bd) {
+		werrstr("dev not found: %s", dev_name);
+		return -1;
+	}
+
+	for (usize i = 0; i < CONFIG_EXT4_MOUNTPOINTS_COUNT; ++i) {
+		if (!s_mp[i].mounted) {
+			strcpy(s_mp[i].name, mount_point);
+			s_mp[i].mounted = 1;
+			mp = &s_mp[i];
+			break;
+		}
+
+		if (!strcmp(s_mp[i].name, mount_point))
+			return 0;
+	}
+
+	if (!mp) {
+		werrstr("memory");
+		return -1;
+	}
+
+	r = ext4_block_init(bd);
+	if (r != 0)
+		return r;
+
+	r = ext4_fs_init(&mp->fs, bd, read_only);
+	if (r != 0) {
+		ext4_block_fini(bd);
+		return r;
+	}
+
+	bsize = ext4_sb_get_block_size(&mp->fs.sb);
+	ext4_block_set_lb_size(bd, bsize);
+	bc = &mp->bc;
+
+	r = ext4_bcache_init_dynamic(bc, CONFIG_BLOCK_DEV_CACHE_SIZE, bsize);
+	if (r != 0) {
+		ext4_block_fini(bd);
+		return r;
+	}
+
+	if (bsize != bc->itemsize) {
+		werrstr("unsupported block size: %d", bsize);
+		return -1;
+	}
+
+	/*Bind block cache to block device*/
+	r = ext4_block_bind_bcache(bd, bc);
+	if (r != 0) {
+		ext4_bcache_cleanup(bc);
+		ext4_block_fini(bd);
+		ext4_bcache_fini_dynamic(bc);
+		return r;
+	}
+
+	bd->fs = &mp->fs;
+	return r;
+}
+
+static struct ext4_mountpoint *ext4_get_mount(const char *path)
+{
+	for (usize i = 0; i < CONFIG_EXT4_MOUNTPOINTS_COUNT; ++i) {
+		if (!s_mp[i].mounted)
+			continue;
+		if (!strncmp(s_mp[i].name, path, strlen(s_mp[i].name)))
+			return &s_mp[i];
+	}
+
+	werrstr("mount point not found: %s", path);
+	return nil;
+}
+
+int ext4_umount(const char *mount_point)
+{
+	int r;
+	struct ext4_mountpoint *mp = ext4_get_mount(mount_point);
+
+	if (!mp)
+		return -1;
+
+	r = ext4_fs_fini(&mp->fs);
+	if (r != 0)
+		goto Finish;
+
+	mp->mounted = 0;
+
+	ext4_bcache_cleanup(mp->fs.bdev->bc);
+	ext4_bcache_fini_dynamic(mp->fs.bdev->bc);
+
+	r = ext4_block_fini(mp->fs.bdev);
+Finish:
+	mp->fs.bdev->fs = nil;
+	return r;
+}
+
+int ext4_journal_start(const char *mount_point)
+{
+	int r;
+	struct ext4_mountpoint *mp = ext4_get_mount(mount_point);
+
+	if (!mp)
+		return -1;
+	if (mp->fs.read_only)
+		return 0;
+	if (!ext4_sb_feature_com(&mp->fs.sb, EXT4_FCOM_HAS_JOURNAL))
+		return 0;
+
+	r = jbd_get_fs(&mp->fs, &mp->jbd_fs);
+	if (r != 0)
+		goto Finish;
+
+	r = jbd_journal_start(&mp->jbd_fs, &mp->jbd_journal);
+	if (r != 0) {
+		mp->jbd_fs.dirty = false;
+		jbd_put_fs(&mp->jbd_fs);
+		goto Finish;
+	}
+	mp->fs.jbd_fs = &mp->jbd_fs;
+	mp->fs.jbd_journal = &mp->jbd_journal;
+
+Finish:
+	return r;
+}
+
+int ext4_journal_stop(const char *mount_point)
+{
+	int r;
+	struct ext4_mountpoint *mp = ext4_get_mount(mount_point);
+
+	if (!mp)
+		return -1;
+	if (mp->fs.read_only)
+		return 0;
+	if (!ext4_sb_feature_com(&mp->fs.sb, EXT4_FCOM_HAS_JOURNAL))
+		return 0;
+	r = jbd_journal_stop(&mp->jbd_journal);
+	if (r != 0) {
+		mp->jbd_fs.dirty = false;
+		jbd_put_fs(&mp->jbd_fs);
+		mp->fs.jbd_journal = nil;
+		mp->fs.jbd_fs = nil;
+		goto Finish;
+	}
+
+	r = jbd_put_fs(&mp->jbd_fs);
+	if (r != 0) {
+		mp->fs.jbd_journal = nil;
+		mp->fs.jbd_fs = nil;
+		goto Finish;
+	}
+
+	mp->fs.jbd_journal = nil;
+	mp->fs.jbd_fs = nil;
+
+Finish:
+	return r;
+}
+
+int ext4_recover(const char *mount_point)
+{
+	struct ext4_mountpoint *mp = ext4_get_mount(mount_point);
+	int r;
+
+	if (!mp)
+		return -1;
+
+	EXT4_MP_LOCK(mp);
+	if (!ext4_sb_feature_com(&mp->fs.sb, EXT4_FCOM_HAS_JOURNAL))
+		return 0;
+
+	struct jbd_fs *jbd_fs = ext4_calloc(1, sizeof(struct jbd_fs));
+	if (!jbd_fs) {
+		werrstr("memory");
+		r = -1;
+		goto Finish;
+	}
+
+	r = jbd_get_fs(&mp->fs, jbd_fs);
+	if (r != 0) {
+		ext4_free(jbd_fs);
+		goto Finish;
+	}
+
+	r = jbd_recover(jbd_fs);
+	jbd_put_fs(jbd_fs);
+	ext4_free(jbd_fs);
+
+	if (r == 0 && !mp->fs.read_only) {
+		u32int bgid;
+		u64int free_blocks_count = 0;
+		u32int free_inodes_count = 0;
+		struct ext4_block_group_ref bg_ref;
+
+		/* Update superblock's stats */
+		for (bgid = 0;bgid < ext4_block_group_cnt(&mp->fs.sb);bgid++) {
+			r = ext4_fs_get_block_group_ref(&mp->fs, bgid, &bg_ref);
+			if (r != 0)
+				goto Finish;
+
+			free_blocks_count +=
+				ext4_bg_get_free_blocks_count(bg_ref.block_group,
+						&mp->fs.sb);
+			free_inodes_count +=
+				ext4_bg_get_free_inodes_count(bg_ref.block_group,
+						&mp->fs.sb);
+
+			ext4_fs_put_block_group_ref(&bg_ref);
+		}
+		ext4_sb_set_free_blocks_cnt(&mp->fs.sb, free_blocks_count);
+		ext4_set32(&mp->fs.sb, free_inodes_count, free_inodes_count);
+		/* We don't need to save the superblock stats immediately. */
+	}
+
+Finish:
+	EXT4_MP_UNLOCK(mp);
+	return r;
+}
+
+int ext4_trans_start(struct ext4_mountpoint *mp)
+{
+	int r = 0;
+
+	if (mp->fs.jbd_journal && !mp->fs.curr_trans) {
+		struct jbd_journal *journal = mp->fs.jbd_journal;
+		struct jbd_trans *trans;
+		trans = jbd_journal_new_trans(journal);
+		if (!trans) {
+			werrstr("memory");
+			r = -1;
+			goto Finish;
+		}
+		mp->fs.curr_trans = trans;
+	}
+Finish:
+	return r;
+}
+
+int ext4_trans_stop(struct ext4_mountpoint *mp)
+{
+	int r = 0;
+
+	if (mp->fs.jbd_journal && mp->fs.curr_trans) {
+		struct jbd_journal *journal = mp->fs.jbd_journal;
+		struct jbd_trans *trans = mp->fs.curr_trans;
+		r = jbd_journal_commit_trans(journal, trans);
+		mp->fs.curr_trans = nil;
+	}
+	return r;
+}
+
+void ext4_trans_abort(struct ext4_mountpoint *mp)
+{
+	if (mp->fs.jbd_journal && mp->fs.curr_trans) {
+		struct jbd_journal *journal = mp->fs.jbd_journal;
+		struct jbd_trans *trans = mp->fs.curr_trans;
+		jbd_journal_free_trans(journal, trans, true);
+		mp->fs.curr_trans = nil;
+	}
+}
+
+int ext4_mount_point_stats(const char *mount_point,
+			   struct ext4_mount_stats *stats)
+{
+	struct ext4_mountpoint *mp = ext4_get_mount(mount_point);
+
+	if (!mp)
+		return -1;
+
+	EXT4_MP_LOCK(mp);
+	stats->inodes_count = ext4_get32(&mp->fs.sb, inodes_count);
+	stats->free_inodes_count = ext4_get32(&mp->fs.sb, free_inodes_count);
+	stats->blocks_count = ext4_sb_get_blocks_cnt(&mp->fs.sb);
+	stats->free_blocks_count = ext4_sb_get_free_blocks_cnt(&mp->fs.sb);
+	stats->block_size = ext4_sb_get_block_size(&mp->fs.sb);
+
+	stats->block_group_count = ext4_block_group_cnt(&mp->fs.sb);
+	stats->blocks_per_group = ext4_get32(&mp->fs.sb, blocks_per_group);
+	stats->inodes_per_group = ext4_get32(&mp->fs.sb, inodes_per_group);
+
+	memcpy(stats->volume_name, mp->fs.sb.volume_name, 16);
+	EXT4_MP_UNLOCK(mp);
+
+	return 0;
+}
+
+int ext4_mount_setup_locks(const char *mount_point,
+			   const struct ext4_lock *locks)
+{
+	u32int i;
+	struct ext4_mountpoint *mp = nil;
+
+	for (i = 0; i < CONFIG_EXT4_MOUNTPOINTS_COUNT; ++i) {
+		if (!strcmp(s_mp[i].name, mount_point)) {
+			mp = &s_mp[i];
+			break;
+		}
+	}
+	if (!mp) {
+		werrstr("mount point not found: %s", mount_point);
+		return -1;
+	}
+
+	mp->os_locks = locks;
+	return 0;
+}
+
+/********************************FILE OPERATIONS*****************************/
+
+static int ext4_path_check(const char *path, bool *is_goal)
+{
+	int i;
+
+	for (i = 0; i < EXT4_DIRECTORY_FILENAME_LEN; ++i) {
+
+		if (path[i] == '/') {
+			*is_goal = false;
+			return i;
+		}
+
+		if (path[i] == 0) {
+			*is_goal = true;
+			return i;
+		}
+	}
+
+	return 0;
+}
+
+static bool ext4_parse_flags(const char *flags, u32int *file_flags)
+{
+	if (!flags)
+		return false;
+
+	if (!strcmp(flags, "r") || !strcmp(flags, "rb")) {
+		*file_flags = O_RDONLY;
+		return true;
+	}
+
+	if (!strcmp(flags, "w") || !strcmp(flags, "wb")) {
+		*file_flags = O_WRONLY | O_CREAT | O_TRUNC;
+		return true;
+	}
+
+	if (!strcmp(flags, "a") || !strcmp(flags, "ab")) {
+		*file_flags = O_WRONLY | O_CREAT | O_APPEND;
+		return true;
+	}
+
+	if (!strcmp(flags, "r+") || !strcmp(flags, "rb+") ||
+	    !strcmp(flags, "r+b")) {
+		*file_flags = O_RDWR;
+		return true;
+	}
+
+	if (!strcmp(flags, "w+") || !strcmp(flags, "wb+") ||
+	    !strcmp(flags, "w+b")) {
+		*file_flags = O_RDWR | O_CREAT | O_TRUNC;
+		return true;
+	}
+
+	if (!strcmp(flags, "a+") || !strcmp(flags, "ab+") ||
+	    !strcmp(flags, "a+b")) {
+		*file_flags = O_RDWR | O_CREAT | O_APPEND;
+		return true;
+	}
+
+	return false;
+}
+
+static int ext4_trunc_inode(struct ext4_mountpoint *mp,
+			    u32int index, u64int new_size)
+{
+	int r;
+	struct ext4_fs *const fs = &mp->fs;
+	struct ext4_inode_ref inode_ref;
+	u64int inode_size;
+	bool has_trans = mp->fs.jbd_journal && mp->fs.curr_trans;
+	r = ext4_fs_get_inode_ref(fs, index, &inode_ref);
+	if (r != 0)
+		return r;
+
+	inode_size = ext4_inode_get_size(&fs->sb, inode_ref.inode);
+	ext4_fs_put_inode_ref(&inode_ref);
+	if (has_trans)
+		ext4_trans_stop(mp);
+
+	while (inode_size > new_size + CONFIG_MAX_TRUNCATE_SIZE) {
+
+		inode_size -= CONFIG_MAX_TRUNCATE_SIZE;
+
+		ext4_trans_start(mp);
+		r = ext4_fs_get_inode_ref(fs, index, &inode_ref);
+		if (r != 0) {
+			ext4_trans_abort(mp);
+			break;
+		}
+		r = ext4_fs_truncate_inode(&inode_ref, inode_size);
+		if (r != 0)
+			ext4_fs_put_inode_ref(&inode_ref);
+		else
+			r = ext4_fs_put_inode_ref(&inode_ref);
+
+		if (r != 0) {
+			ext4_trans_abort(mp);
+			goto Finish;
+		} else
+			ext4_trans_stop(mp);
+	}
+
+	if (inode_size > new_size) {
+
+		inode_size = new_size;
+
+		ext4_trans_start(mp);
+		r = ext4_fs_get_inode_ref(fs, index, &inode_ref);
+		if (r != 0) {
+			ext4_trans_abort(mp);
+			goto Finish;
+		}
+		r = ext4_fs_truncate_inode(&inode_ref, inode_size);
+		if (r != 0)
+			ext4_fs_put_inode_ref(&inode_ref);
+		else
+			r = ext4_fs_put_inode_ref(&inode_ref);
+
+		if (r != 0)
+			ext4_trans_abort(mp);
+		else
+			ext4_trans_stop(mp);
+
+	}
+
+Finish:
+
+	if (has_trans)
+		ext4_trans_start(mp);
+
+	return r;
+}
+
+static int ext4_trunc_dir(struct ext4_mountpoint *mp,
+			  struct ext4_inode_ref *parent,
+			  struct ext4_inode_ref *dir)
+{
+	int r;
+	bool is_dir = ext4_inode_is_type(&mp->fs.sb, dir->inode,
+			EXT4_INODE_MODE_DIRECTORY);
+	u32int block_size = ext4_sb_get_block_size(&mp->fs.sb);
+	if (!is_dir) {
+		werrstr("not a directory");
+		return -1;
+	}
+
+	/* Initialize directory index if supported */
+	if (ext4_sb_feature_com(&mp->fs.sb, EXT4_FCOM_DIR_INDEX)) {
+		r = ext4_dir_dx_init(dir, parent);
+		if (r != 0)
+			return r;
+
+		r = ext4_trunc_inode(mp, dir->index,
+				     EXT4_DIR_DX_INIT_BCNT * block_size);
+		if (r != 0)
+			return r;
+	} else {
+		r = ext4_trunc_inode(mp, dir->index, block_size);
+		if (r != 0)
+			return r;
+	}
+
+	return ext4_fs_truncate_inode(dir, 0);
+}
+
+/*
+ * NOTICE: if filetype is equal to EXT4_DIRENTRY_UNKNOWN,
+ * any filetype of the target dir entry will be accepted.
+ */
+static int ext4_generic_open2(ext4_file *f, const char *path, int flags,
+			      int ftype, u32int *parent_inode,
+			      u32int *name_off)
+{
+	bool is_goal = false;
+	u32int imode = EXT4_INODE_MODE_DIRECTORY;
+	u32int next_inode;
+
+	int r;
+	int len;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	struct ext4_dir_search_result result;
+	struct ext4_inode_ref ref;
+
+	f->mp = 0;
+
+	if (!mp)
+		return -1;
+
+	struct ext4_fs *const fs = &mp->fs;
+	struct ext4_sblock *const sb = &mp->fs.sb;
+
+	if (fs->read_only && flags & O_CREAT) {
+		werrstr(Erdonlyfs);
+		return -1;
+	}
+
+	f->flags = flags;
+
+	/*Skip mount point*/
+	path += strlen(mp->name);
+
+	if (name_off)
+		*name_off = strlen(mp->name);
+
+	/*Load root*/
+	r = ext4_fs_get_inode_ref(fs, EXT4_INODE_ROOT_INDEX, &ref);
+	if (r != 0)
+		return r;
+
+	if (parent_inode)
+		*parent_inode = ref.index;
+
+	while (1) {
+
+		len = ext4_path_check(path, &is_goal);
+		if (!len) {
+			/*If root open was request.*/
+			if (ftype == EXT4_DE_DIR || ftype == EXT4_DE_UNKNOWN)
+				if (is_goal)
+					break;
+
+Notfound:
+			werrstr(Enotfound);
+			r = -1;
+			break;
+		}
+
+		r = ext4_dir_find_entry(&result, &ref, path, len);
+		if (r != 0) {
+
+			/*Destroy last result*/
+			ext4_dir_destroy_result(&ref, &result);
+			if (r != EXT4_ERR_NOT_FOUND)
+				break;
+
+			if (!(f->flags & O_CREAT))
+				break;
+
+			/*O_CREAT allows create new entry*/
+			struct ext4_inode_ref child_ref;
+			r = ext4_fs_alloc_inode(fs, &child_ref,
+					is_goal ? ftype : EXT4_DE_DIR);
+
+			if (r != 0)
+				break;
+
+			ext4_fs_inode_blocks_init(fs, &child_ref);
+
+			/*Link with root dir.*/
+			r = ext4_link(mp, &ref, &child_ref, path, len, false);
+			if (r != 0) {
+				/*Fail. Free new inode.*/
+				ext4_fs_free_inode(&child_ref);
+				/*We do not want to write new inode.
+				  But block has to be released.*/
+				child_ref.dirty = false;
+				ext4_fs_put_inode_ref(&child_ref);
+				break;
+			}
+
+			ext4_fs_put_inode_ref(&child_ref);
+			continue;
+		}
+
+		if (parent_inode)
+			*parent_inode = ref.index;
+
+		next_inode = ext4_dir_en_get_inode(result.dentry);
+		if (ext4_sb_feature_incom(sb, EXT4_FINCOM_FILETYPE)) {
+			u8int t;
+			t = ext4_dir_en_get_inode_type(sb, result.dentry);
+			imode = ext4_fs_correspond_inode_mode(t);
+		} else {
+			struct ext4_inode_ref child_ref;
+			r = ext4_fs_get_inode_ref(fs, next_inode, &child_ref);
+			if (r != 0)
+				break;
+
+			imode = ext4_inode_type(sb, child_ref.inode);
+			ext4_fs_put_inode_ref(&child_ref);
+		}
+
+		r = ext4_dir_destroy_result(&ref, &result);
+		if (r != 0)
+			break;
+
+		/*If expected file error*/
+		if (imode != EXT4_INODE_MODE_DIRECTORY && !is_goal)
+			goto Notfound;
+
+		if (ftype != EXT4_DE_UNKNOWN) {
+			bool df = imode != ext4_fs_correspond_inode_mode(ftype);
+			if (df && is_goal)
+				goto Notfound;
+		}
+
+		r = ext4_fs_put_inode_ref(&ref);
+		if (r != 0)
+			break;
+
+		r = ext4_fs_get_inode_ref(fs, next_inode, &ref);
+		if (r != 0)
+			break;
+
+		if (is_goal)
+			break;
+
+		path += len + 1;
+
+		if (name_off)
+			*name_off += len + 1;
+	}
+
+	if (r != 0) {
+		ext4_fs_put_inode_ref(&ref);
+		return r;
+	}
+
+	if (is_goal) {
+		if ((f->flags & O_TRUNC) && (imode == EXT4_INODE_MODE_FILE)) {
+			r = ext4_trunc_inode(mp, ref.index, 0);
+			if (r != 0) {
+				ext4_fs_put_inode_ref(&ref);
+				return r;
+			}
+		}
+
+		f->mp = mp;
+		f->fsize = ext4_inode_get_size(sb, ref.inode);
+		f->inode = ref.index;
+		f->fpos = 0;
+
+		if (f->flags & O_APPEND)
+			f->fpos = f->fsize;
+	}
+
+	return ext4_fs_put_inode_ref(&ref);
+}
+
+/****************************************************************************/
+
+static int ext4_generic_open(ext4_file *f, const char *path, const char *flags,
+			     bool file_expect, u32int *parent_inode,
+			     u32int *name_off)
+{
+	u32int iflags;
+	int filetype;
+	int r;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+
+	if (ext4_parse_flags(flags, &iflags) == false)
+		return -1;
+
+	if (file_expect == true)
+		filetype = EXT4_DE_REG_FILE;
+	else
+		filetype = EXT4_DE_DIR;
+
+	if (iflags & O_CREAT)
+		ext4_trans_start(mp);
+
+	r = ext4_generic_open2(f, path, iflags, filetype, parent_inode, name_off);
+
+	if (iflags & O_CREAT) {
+		if (r == 0)
+			ext4_trans_stop(mp);
+		else
+			ext4_trans_abort(mp);
+	}
+
+	return r;
+}
+
+static int ext4_create_hardlink(const char *path,
+		struct ext4_inode_ref *child_ref, bool rename)
+{
+	bool is_goal = false;
+	u32int inode_mode;
+	u32int next_inode;
+
+	int r;
+	int len;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	struct ext4_dir_search_result result;
+	struct ext4_inode_ref ref;
+
+	if (!mp)
+		return -1;
+
+	struct ext4_fs *const fs = &mp->fs;
+	struct ext4_sblock *const sb = &mp->fs.sb;
+
+	/*Skip mount point*/
+	path += strlen(mp->name);
+
+	/*Load root*/
+	r = ext4_fs_get_inode_ref(fs, EXT4_INODE_ROOT_INDEX, &ref);
+	if (r != 0)
+		return r;
+
+	while (1) {
+
+		len = ext4_path_check(path, &is_goal);
+		if (!len) {
+			/*If root open was request.*/
+			werrstr(Enotfound);
+			r = -1;
+			break;
+		}
+
+		r = ext4_dir_find_entry(&result, &ref, path, len);
+		if (r != 0) {
+			/*Destroy last result*/
+			ext4_dir_destroy_result(&ref, &result);
+
+			if (r != EXT4_ERR_NOT_FOUND || !is_goal)
+				break;
+
+			/*Link with root dir.*/
+			r = ext4_link(mp, &ref, child_ref, path, len, rename);
+			break;
+		} else if (r == 0 && is_goal) {
+			/*Destroy last result*/
+			ext4_dir_destroy_result(&ref, &result);
+			werrstr(Eexists);
+			r = -1;
+			break;
+		}
+
+		next_inode = result.dentry->inode;
+		if (ext4_sb_feature_incom(sb, EXT4_FINCOM_FILETYPE)) {
+			u8int t;
+			t = ext4_dir_en_get_inode_type(sb, result.dentry);
+			inode_mode = ext4_fs_correspond_inode_mode(t);
+		} else {
+			struct ext4_inode_ref child_ref;
+			r = ext4_fs_get_inode_ref(fs, next_inode, &child_ref);
+			if (r != 0)
+				break;
+
+			inode_mode = ext4_inode_type(sb, child_ref.inode);
+			ext4_fs_put_inode_ref(&child_ref);
+		}
+
+		r = ext4_dir_destroy_result(&ref, &result);
+		if (r != 0)
+			break;
+
+		if (inode_mode != EXT4_INODE_MODE_DIRECTORY) {
+			werrstr(is_goal ? Eexists : Enotfound);
+			r = -1;
+			break;
+		}
+
+		r = ext4_fs_put_inode_ref(&ref);
+		if (r != 0)
+			break;
+
+		r = ext4_fs_get_inode_ref(fs, next_inode, &ref);
+		if (r != 0)
+			break;
+
+		if (is_goal)
+			break;
+
+		path += len + 1;
+	};
+
+	if (r != 0) {
+		ext4_fs_put_inode_ref(&ref);
+		return r;
+	}
+
+	r = ext4_fs_put_inode_ref(&ref);
+	return r;
+}
+
+static int ext4_remove_orig_reference(const char *path, u32int name_off,
+				      struct ext4_inode_ref *parent_ref,
+				      struct ext4_inode_ref *child_ref)
+{
+	bool is_goal;
+	int r;
+	int len;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+
+	if (!mp)
+		return -1;
+
+	/*Set path*/
+	path += name_off;
+
+	len = ext4_path_check(path, &is_goal);
+
+	/* Remove entry from parent directory */
+	r = ext4_dir_remove_entry(parent_ref, path, len);
+	if (r != 0)
+		goto Finish;
+
+	if (ext4_inode_is_type(&mp->fs.sb, child_ref->inode,
+			       EXT4_INODE_MODE_DIRECTORY)) {
+		ext4_fs_inode_links_count_dec(parent_ref);
+		parent_ref->dirty = true;
+	}
+Finish:
+	return r;
+}
+
+int ext4_flink(const char *path, const char *hardlink_path)
+{
+	int r;
+	ext4_file f;
+	u32int name_off;
+	bool child_loaded = false;
+	u32int parent_inode, child_inode;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	struct ext4_mountpoint *target_mp = ext4_get_mount(hardlink_path);
+	struct ext4_inode_ref child_ref;
+
+	if (!mp)
+		return -1;
+
+	if (mp->fs.read_only) {
+		werrstr(Erdonlyfs);
+		return -1;
+	}
+
+	/* Will that happen? */
+	if (mp != target_mp) {
+		werrstr("mount point must be the same: %s vs %s", path, hardlink_path);
+		return -1;
+	}
+
+	EXT4_MP_LOCK(mp);
+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, &parent_inode, &name_off);
+	if (r != 0) {
+		EXT4_MP_UNLOCK(mp);
+		return r;
+	}
+
+	child_inode = f.inode;
+	ext4_fclose(&f);
+	ext4_trans_start(mp);
+
+	/*We have file to unlink. Load it.*/
+	r = ext4_fs_get_inode_ref(&mp->fs, child_inode, &child_ref);
+	if (r != 0)
+		goto Finish;
+
+	child_loaded = true;
+
+	/* Creating hardlink for directory is not allowed. */
+	if (ext4_inode_is_type(&mp->fs.sb, child_ref.inode, EXT4_INODE_MODE_DIRECTORY)) {
+		werrstr("is a directory");
+		r = -1;
+		goto Finish;
+	}
+
+	r = ext4_create_hardlink(hardlink_path, &child_ref, false);
+
+Finish:
+	if (child_loaded)
+		ext4_fs_put_inode_ref(&child_ref);
+
+	if (r != 0)
+		ext4_trans_abort(mp);
+	else
+		ext4_trans_stop(mp);
+
+	EXT4_MP_UNLOCK(mp);
+	return r;
+
+}
+
+int ext4_frename(const char *path, const char *new_path)
+{
+	int r;
+	ext4_file f;
+	u32int name_off;
+	bool parent_loaded = false, child_loaded = false;
+	u32int parent_inode, child_inode;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	struct ext4_inode_ref child_ref, parent_ref;
+
+	if (!mp)
+		return -1;
+
+	if (mp->fs.read_only) {
+		werrstr(Erdonlyfs);
+		return -1;
+	}
+
+	EXT4_MP_LOCK(mp);
+
+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN,
+				&parent_inode, &name_off);
+	if (r != 0) {
+		EXT4_MP_UNLOCK(mp);
+		return r;
+	}
+
+	child_inode = f.inode;
+	ext4_fclose(&f);
+	ext4_trans_start(mp);
+
+	/*Load parent*/
+	r = ext4_fs_get_inode_ref(&mp->fs, parent_inode, &parent_ref);
+	if (r != 0)
+		goto Finish;
+
+	parent_loaded = true;
+
+	/*We have file to unlink. Load it.*/
+	r = ext4_fs_get_inode_ref(&mp->fs, child_inode, &child_ref);
+	if (r != 0)
+		goto Finish;
+
+	child_loaded = true;
+
+	r = ext4_create_hardlink(new_path, &child_ref, true);
+	if (r != 0)
+		goto Finish;
+
+	r = ext4_remove_orig_reference(path, name_off, &parent_ref, &child_ref);
+	if (r != 0)
+		goto Finish;
+
+Finish:
+	if (parent_loaded)
+		ext4_fs_put_inode_ref(&parent_ref);
+
+	if (child_loaded)
+		ext4_fs_put_inode_ref(&child_ref);
+
+	if (r != 0)
+		ext4_trans_abort(mp);
+	else
+		ext4_trans_stop(mp);
+
+	EXT4_MP_UNLOCK(mp);
+	return r;
+
+}
+
+/****************************************************************************/
+
+int ext4_get_sblock(const char *mount_point, struct ext4_sblock **sb)
+{
+	struct ext4_mountpoint *mp = ext4_get_mount(mount_point);
+
+	if (!mp)
+		return -1;
+
+	*sb = &mp->fs.sb;
+	return 0;
+}
+
+int ext4_cache_write_back(const char *path, bool on)
+{
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	int ret;
+
+	if (!mp)
+		return -1;
+
+	EXT4_MP_LOCK(mp);
+	ret = ext4_block_cache_write_back(mp->fs.bdev, on);
+	EXT4_MP_UNLOCK(mp);
+	return ret;
+}
+
+int ext4_cache_flush(const char *path)
+{
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	int ret;
+
+	if (!mp)
+		return -1;
+
+	EXT4_MP_LOCK(mp);
+	ret = ext4_block_cache_flush(mp->fs.bdev);
+	EXT4_MP_UNLOCK(mp);
+	return ret;
+}
+
+int ext4_fremove(const char *path)
+{
+	ext4_file f;
+	u32int parent_inode;
+	u32int child_inode;
+	u32int name_off;
+	bool is_goal;
+	int r;
+	int len;
+	struct ext4_inode_ref child;
+	struct ext4_inode_ref parent;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+
+	if (!mp)
+		return -1;
+
+	if (mp->fs.read_only) {
+		werrstr(Erdonlyfs);
+		return -1;
+	}
+
+	EXT4_MP_LOCK(mp);
+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, &parent_inode, &name_off);
+	if (r != 0) {
+		EXT4_MP_UNLOCK(mp);
+		return r;
+	}
+
+	child_inode = f.inode;
+	ext4_fclose(&f);
+	ext4_trans_start(mp);
+
+	/*Load parent*/
+	r = ext4_fs_get_inode_ref(&mp->fs, parent_inode, &parent);
+	if (r != 0) {
+		ext4_trans_abort(mp);
+		EXT4_MP_UNLOCK(mp);
+		return r;
+	}
+
+	/*We have file to delete. Load it.*/
+	r = ext4_fs_get_inode_ref(&mp->fs, child_inode, &child);
+	if (r != 0) {
+		ext4_fs_put_inode_ref(&parent);
+		ext4_trans_abort(mp);
+		EXT4_MP_UNLOCK(mp);
+		return r;
+	}
+	/* We do not allow opening files here. */
+	if (ext4_inode_type(&mp->fs.sb, child.inode) ==
+	    EXT4_INODE_MODE_DIRECTORY) {
+		ext4_fs_put_inode_ref(&parent);
+		ext4_fs_put_inode_ref(&child);
+		ext4_trans_abort(mp);
+		EXT4_MP_UNLOCK(mp);
+		return r;
+	}
+
+	/*Link count will be zero, the inode should be freed. */
+	if (ext4_inode_get_links_cnt(child.inode) == 1) {
+		ext4_block_cache_write_back(mp->fs.bdev, 1);
+		r = ext4_trunc_inode(mp, child.index, 0);
+		if (r != 0) {
+			ext4_fs_put_inode_ref(&parent);
+			ext4_fs_put_inode_ref(&child);
+			ext4_trans_abort(mp);
+			EXT4_MP_UNLOCK(mp);
+			return r;
+		}
+		ext4_block_cache_write_back(mp->fs.bdev, 0);
+	}
+
+	/*Set path*/
+	path += name_off;
+
+	len = ext4_path_check(path, &is_goal);
+
+	/*Unlink from parent*/
+	r = ext4_unlink(mp, &parent, &child, path, len);
+	if (r != 0)
+		goto Finish;
+
+	/*Link count is zero, the inode should be freed. */
+	if (!ext4_inode_get_links_cnt(child.inode)) {
+		ext4_inode_set_del_time(child.inode, -1L);
+
+		r = ext4_fs_free_inode(&child);
+		if (r != 0)
+			goto Finish;
+	}
+
+Finish:
+	ext4_fs_put_inode_ref(&child);
+	ext4_fs_put_inode_ref(&parent);
+
+	if (r != 0)
+		ext4_trans_abort(mp);
+	else
+		ext4_trans_stop(mp);
+
+	EXT4_MP_UNLOCK(mp);
+	return r;
+}
+
+int ext4_fopen(ext4_file *file, const char *path, const char *flags)
+{
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	int r;
+
+	if (!mp)
+		return -1;
+
+	EXT4_MP_LOCK(mp);
+
+	ext4_block_cache_write_back(mp->fs.bdev, 1);
+	r = ext4_generic_open(file, path, flags, true, 0, 0);
+	ext4_block_cache_write_back(mp->fs.bdev, 0);
+
+	EXT4_MP_UNLOCK(mp);
+	return r;
+}
+
+int ext4_fopen2(ext4_file *file, const char *path, int flags)
+{
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	int r;
+	int filetype;
+
+	if (!mp)
+		return -1;
+
+	filetype = EXT4_DE_REG_FILE;
+
+	EXT4_MP_LOCK(mp);
+	ext4_block_cache_write_back(mp->fs.bdev, 1);
+
+	if (flags & O_CREAT)
+		ext4_trans_start(mp);
+
+	r = ext4_generic_open2(file, path, flags, filetype, nil, nil);
+
+	if (flags & O_CREAT) {
+		if (r == 0)
+			ext4_trans_stop(mp);
+		else
+			ext4_trans_abort(mp);
+	}
+
+	ext4_block_cache_write_back(mp->fs.bdev, 0);
+	EXT4_MP_UNLOCK(mp);
+
+	return r;
+}
+
+int ext4_fclose(ext4_file *file)
+{
+	assert(file && file->mp);
+
+	file->mp = 0;
+	file->flags = 0;
+	file->inode = 0;
+	file->fpos = 0;
+	file->fsize = 0;
+
+	return 0;
+}
+
+static int ext4_ftruncate_no_lock(ext4_file *file, u64int size)
+{
+	struct ext4_inode_ref ref;
+	int r;
+
+
+	r = ext4_fs_get_inode_ref(&file->mp->fs, file->inode, &ref);
+	if (r != 0) {
+		EXT4_MP_UNLOCK(file->mp);
+		return r;
+	}
+
+	/*Sync file size*/
+	file->fsize = ext4_inode_get_size(&file->mp->fs.sb, ref.inode);
+	if (file->fsize <= size) {
+		werrstr("space preallocation not supported");
+		r = -1;
+		goto Finish;
+	}
+
+	/*Start write back cache mode.*/
+	r = ext4_block_cache_write_back(file->mp->fs.bdev, 1);
+	if (r != 0)
+		goto Finish;
+
+	r = ext4_trunc_inode(file->mp, ref.index, size);
+	if (r != 0)
+		goto Finish;
+
+	file->fsize = size;
+	if (file->fpos > size)
+		file->fpos = size;
+
+	/*Stop write back cache mode*/
+	ext4_block_cache_write_back(file->mp->fs.bdev, 0);
+
+	if (r != 0)
+		goto Finish;
+
+Finish:
+	ext4_fs_put_inode_ref(&ref);
+	return r;
+
+}
+
+int ext4_ftruncate(ext4_file *f, u64int size)
+{
+	int r;
+	assert(f && f->mp);
+
+	if (f->mp->fs.read_only) {
+		werrstr(Erdonlyfs);
+		return -1;
+	}
+
+	if (f->flags & O_RDONLY) {
+		werrstr(Eperm);
+		return -1;
+	}
+
+	EXT4_MP_LOCK(f->mp);
+
+	ext4_trans_start(f->mp);
+
+	r = ext4_ftruncate_no_lock(f, size);
+
+	if( r == 0 )
+		ext4_trans_stop(f->mp);
+	else
+		ext4_trans_abort(f->mp);
+
+	EXT4_MP_UNLOCK(f->mp);
+	return r;
+}
+
+int ext4_fread(ext4_file *file, void *buf, usize size, usize *rcnt)
+{
+	u32int unalg;
+	u32int iblock_idx;
+	u32int iblock_last;
+	u32int block_size;
+
+	ext4_fsblk_t fblock;
+	ext4_fsblk_t fblock_start;
+	u32int fblock_count;
+
+	u8int *u8_buf = buf;
+	int r;
+	struct ext4_inode_ref ref;
+
+	assert(file && file->mp);
+
+	if (file->flags & O_WRONLY) {
+		werrstr(Eperm);
+		return -1;
+	}
+
+	if (!size)
+		return 0;
+
+	EXT4_MP_LOCK(file->mp);
+
+	struct ext4_fs *const fs = &file->mp->fs;
+	struct ext4_sblock *const sb = &file->mp->fs.sb;
+
+	if (rcnt)
+		*rcnt = 0;
+
+	r = ext4_fs_get_inode_ref(fs, file->inode, &ref);
+	if (r != 0) {
+		EXT4_MP_UNLOCK(file->mp);
+		return r;
+	}
+
+	/*Sync file size*/
+	file->fsize = ext4_inode_get_size(sb, ref.inode);
+
+	block_size = ext4_sb_get_block_size(sb);
+	size = ((u64int)size > (file->fsize - file->fpos))
+		? ((usize)(file->fsize - file->fpos)) : size;
+
+	iblock_idx = (u32int)((file->fpos) / block_size);
+	iblock_last = (u32int)((file->fpos + size) / block_size);
+	unalg = (file->fpos) % block_size;
+
+	/*If the size of symlink is smaller than 60 bytes*/
+	bool softlink;
+	softlink = ext4_inode_is_type(sb, ref.inode, EXT4_INODE_MODE_SOFTLINK);
+	if (softlink && file->fsize < sizeof(ref.inode->blocks)
+		     && !ext4_inode_get_blocks_count(sb, ref.inode)) {
+
+		char *content = (char *)ref.inode->blocks;
+		if (file->fpos < file->fsize) {
+			usize len = size;
+			if (unalg + size > (u32int)file->fsize)
+				len = (u32int)file->fsize - unalg;
+			memcpy(buf, content + unalg, len);
+			if (rcnt)
+				*rcnt = len;
+
+		}
+
+		r = 0;
+		goto Finish;
+	}
+
+	if (unalg) {
+		usize len =  size;
+		if (size > (block_size - unalg))
+			len = block_size - unalg;
+
+		r = ext4_fs_get_inode_dblk_idx(&ref, iblock_idx, &fblock, true);
+		if (r != 0)
+			goto Finish;
+
+		/* Do we get an unwritten range? */
+		if (fblock != 0) {
+			u64int off = fblock * block_size + unalg;
+			r = ext4_block_readbytes(file->mp->fs.bdev, off, u8_buf, len);
+			if (r != 0)
+				goto Finish;
+
+		} else {
+			/* Yes, we do. */
+			memset(u8_buf, 0, len);
+		}
+
+		u8_buf += len;
+		size -= len;
+		file->fpos += len;
+
+		if (rcnt)
+			*rcnt += len;
+
+		iblock_idx++;
+	}
+
+	fblock_start = 0;
+	fblock_count = 0;
+	while (size >= block_size) {
+		while (iblock_idx < iblock_last) {
+			r = ext4_fs_get_inode_dblk_idx(&ref, iblock_idx,
+						       &fblock, true);
+			if (r != 0)
+				goto Finish;
+
+			iblock_idx++;
+
+			if (!fblock_start)
+				fblock_start = fblock;
+
+			if ((fblock_start + fblock_count) != fblock)
+				break;
+
+			fblock_count++;
+		}
+
+		r = ext4_blocks_get_direct(file->mp->fs.bdev, u8_buf, fblock_start,
+					   fblock_count);
+		if (r != 0)
+			goto Finish;
+
+		size -= block_size * fblock_count;
+		u8_buf += block_size * fblock_count;
+		file->fpos += block_size * fblock_count;
+
+		if (rcnt)
+			*rcnt += block_size * fblock_count;
+
+		fblock_start = fblock;
+		fblock_count = 1;
+	}
+
+	if (size) {
+		u64int off;
+		r = ext4_fs_get_inode_dblk_idx(&ref, iblock_idx, &fblock, true);
+		if (r != 0)
+			goto Finish;
+
+		off = fblock * block_size;
+		r = ext4_block_readbytes(file->mp->fs.bdev, off, u8_buf, size);
+		if (r != 0)
+			goto Finish;
+
+		file->fpos += size;
+
+		if (rcnt)
+			*rcnt += size;
+	}
+
+Finish:
+	ext4_fs_put_inode_ref(&ref);
+	EXT4_MP_UNLOCK(file->mp);
+	return r;
+}
+
+int ext4_fwrite(ext4_file *file, const void *buf, usize size, usize *wcnt)
+{
+	u32int unalg;
+	u32int iblk_idx;
+	u32int iblock_last;
+	u32int ifile_blocks;
+	u32int block_size;
+
+	u32int fblock_count;
+	ext4_fsblk_t fblk;
+	ext4_fsblk_t fblock_start;
+
+	struct ext4_inode_ref ref;
+	const u8int *u8_buf = buf;
+	int r, rr = 0;
+
+	assert(file && file->mp);
+
+	if (file->mp->fs.read_only) {
+		werrstr(Erdonlyfs);
+		return -1;
+	}
+
+	if (file->flags & O_RDONLY) {
+		werrstr(Eperm);
+		return -1;
+	}
+
+	if (!size)
+		return 0;
+
+	EXT4_MP_LOCK(file->mp);
+	ext4_trans_start(file->mp);
+
+	struct ext4_fs *const fs = &file->mp->fs;
+	struct ext4_sblock *const sb = &file->mp->fs.sb;
+
+	if (wcnt)
+		*wcnt = 0;
+
+	r = ext4_fs_get_inode_ref(fs, file->inode, &ref);
+	if (r != 0) {
+		ext4_trans_abort(file->mp);
+		EXT4_MP_UNLOCK(file->mp);
+		return r;
+	}
+
+	/*Sync file size*/
+	file->fsize = ext4_inode_get_size(sb, ref.inode);
+	block_size = ext4_sb_get_block_size(sb);
+
+	iblock_last = (u32int)((file->fpos + size) / block_size);
+	iblk_idx = (u32int)(file->fpos / block_size);
+	ifile_blocks = (u32int)((file->fsize + block_size - 1) / block_size);
+
+	unalg = (file->fpos) % block_size;
+
+	if (unalg) {
+		usize len =  size;
+		u64int off;
+		if (size > (block_size - unalg))
+			len = block_size - unalg;
+
+		r = ext4_fs_init_inode_dblk_idx(&ref, iblk_idx, &fblk);
+		if (r != 0)
+			goto Finish;
+
+		off = fblk * block_size + unalg;
+		r = ext4_block_writebytes(file->mp->fs.bdev, off, u8_buf, len);
+		if (r != 0)
+			goto Finish;
+
+		u8_buf += len;
+		size -= len;
+		file->fpos += len;
+
+		if (wcnt)
+			*wcnt += len;
+
+		iblk_idx++;
+	}
+
+	/*Start write back cache mode.*/
+	r = ext4_block_cache_write_back(file->mp->fs.bdev, 1);
+	if (r != 0)
+		goto Finish;
+
+	fblock_start = 0;
+	fblock_count = 0;
+	while (size >= block_size) {
+
+		while (iblk_idx < iblock_last) {
+			if (iblk_idx < ifile_blocks) {
+				r = ext4_fs_init_inode_dblk_idx(&ref, iblk_idx,
+								&fblk);
+				if (r != 0)
+					goto Finish;
+			} else {
+				rr = ext4_fs_append_inode_dblk(&ref, &fblk,
+							       &iblk_idx);
+				if (rr != 0) {
+					/* Unable to append more blocks. But
+					 * some block might be allocated already
+					 * */
+					break;
+				}
+			}
+
+			iblk_idx++;
+
+			if (!fblock_start) {
+				fblock_start = fblk;
+			}
+
+			if ((fblock_start + fblock_count) != fblk)
+				break;
+
+			fblock_count++;
+		}
+
+		r = ext4_blocks_set_direct(file->mp->fs.bdev, u8_buf, fblock_start,
+					   fblock_count);
+		if (r != 0)
+			break;
+
+		size -= block_size * fblock_count;
+		u8_buf += block_size * fblock_count;
+		file->fpos += block_size * fblock_count;
+
+		if (wcnt)
+			*wcnt += block_size * fblock_count;
+
+		fblock_start = fblk;
+		fblock_count = 1;
+
+		if (rr != 0) {
+			/*ext4_fs_append_inode_block has failed and no
+			 * more blocks might be written. But node size
+			 * should be updated.*/
+			/* FIXME wth is happening here exactly? */
+			//r = rr;
+			goto out_fsize;
+		}
+	}
+
+	/*Stop write back cache mode*/
+	ext4_block_cache_write_back(file->mp->fs.bdev, 0);
+
+	if (r != 0)
+		goto Finish;
+
+	if (size) {
+		u64int off;
+		if (iblk_idx < ifile_blocks) {
+			r = ext4_fs_init_inode_dblk_idx(&ref, iblk_idx, &fblk);
+			if (r != 0)
+				goto Finish;
+		} else {
+			r = ext4_fs_append_inode_dblk(&ref, &fblk, &iblk_idx);
+			if (r != 0)
+				/*Node size sholud be updated.*/
+				goto out_fsize;
+		}
+
+		off = fblk * block_size;
+		r = ext4_block_writebytes(file->mp->fs.bdev, off, u8_buf, size);
+		if (r != 0)
+			goto Finish;
+
+		file->fpos += size;
+
+		if (wcnt)
+			*wcnt += size;
+	}
+
+out_fsize:
+	if (file->fpos > file->fsize) {
+		file->fsize = file->fpos;
+		ext4_inode_set_size(ref.inode, file->fsize);
+		ref.dirty = true;
+	}
+
+Finish:
+	r = ext4_fs_put_inode_ref(&ref);
+
+	if (r != 0)
+		ext4_trans_abort(file->mp);
+	else
+		ext4_trans_stop(file->mp);
+
+	EXT4_MP_UNLOCK(file->mp);
+	return r;
+}
+
+int ext4_fseek(ext4_file *file, s64int offset, u32int origin)
+{
+	switch (origin) {
+	case 0:
+		if (offset < 0 || (u64int)offset > file->fsize)
+			break;
+
+		file->fpos = offset;
+		return 0;
+	case 1:
+		if ((offset < 0 && (u64int)(-offset) > file->fpos) ||
+		    (offset > 0 &&
+		     (u64int)offset > (file->fsize - file->fpos)))
+			break;
+
+		file->fpos += offset;
+		return 0;
+	case 2:
+		if (offset < 0 || (u64int)offset > file->fsize)
+			break;
+
+		file->fpos = file->fsize - offset;
+		return 0;
+	}
+
+	werrstr(Einval);
+	return -1;
+}
+
+u64int ext4_ftell(ext4_file *file)
+{
+	return file->fpos;
+}
+
+u64int ext4_fsize(ext4_file *file)
+{
+	return file->fsize;
+}
+
+
+static int ext4_trans_get_inode_ref(const char *path,
+				    struct ext4_mountpoint *mp,
+				    struct ext4_inode_ref *inode_ref)
+{
+	int r;
+	ext4_file f;
+
+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);
+	if (r != 0)
+		return r;
+
+	ext4_trans_start(mp);
+
+	r = ext4_fs_get_inode_ref(&mp->fs, f.inode, inode_ref);
+	if (r != 0) {
+		ext4_trans_abort(mp);
+		return r;
+	}
+
+	return r;
+}
+
+static int ext4_trans_put_inode_ref(struct ext4_mountpoint *mp,
+				    struct ext4_inode_ref *inode_ref)
+{
+	int r;
+
+	r = ext4_fs_put_inode_ref(inode_ref);
+	if (r != 0)
+		ext4_trans_abort(mp);
+	else
+		ext4_trans_stop(mp);
+
+	return r;
+}
+
+
+int ext4_raw_inode_fill(const char *path, u32int *ret_ino,
+			struct ext4_inode *inode)
+{
+	int r;
+	ext4_file f;
+	struct ext4_inode_ref inode_ref;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+
+	if (!mp)
+		return -1;
+
+	EXT4_MP_LOCK(mp);
+
+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);
+	if (r != 0) {
+		EXT4_MP_UNLOCK(mp);
+		return r;
+	}
+
+	/*Load parent*/
+	r = ext4_fs_get_inode_ref(&mp->fs, f.inode, &inode_ref);
+	if (r != 0) {
+		EXT4_MP_UNLOCK(mp);
+		return r;
+	}
+
+	if (ret_ino)
+		*ret_ino = f.inode;
+	if (inode)
+		memcpy(inode, inode_ref.inode, sizeof(struct ext4_inode));
+
+	ext4_fs_put_inode_ref(&inode_ref);
+	EXT4_MP_UNLOCK(mp);
+
+	return r;
+}
+
+int ext4_inode_exist(const char *path, int type)
+{
+	int r;
+	ext4_file f;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+
+	if (!mp)
+		return -1;
+
+	EXT4_MP_LOCK(mp);
+	r = ext4_generic_open2(&f, path, O_RDONLY, type, nil, nil);
+	EXT4_MP_UNLOCK(mp);
+
+	return r;
+}
+
+int ext4_mode_set(const char *path, u32int mode)
+{
+	int r;
+	u32int orig_mode;
+	struct ext4_inode_ref inode_ref;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+
+	if (!mp)
+		return -1;
+
+	if (mp->fs.read_only) {
+		werrstr(Erdonlyfs);
+		return -1;
+	}
+
+	EXT4_MP_LOCK(mp);
+
+	r = ext4_trans_get_inode_ref(path, mp, &inode_ref);
+	if (r != 0)
+		goto Finish;
+
+	orig_mode = ext4_inode_get_mode(&mp->fs.sb, inode_ref.inode);
+	orig_mode &= ~0xFFF;
+	orig_mode |= mode & 0xFFF;
+	ext4_inode_set_mode(&mp->fs.sb, inode_ref.inode, orig_mode);
+
+	inode_ref.dirty = true;
+	r = ext4_trans_put_inode_ref(mp, &inode_ref);
+
+	Finish:
+	EXT4_MP_UNLOCK(mp);
+
+	return r;
+}
+
+int ext4_owner_set(const char *path, u32int uid, u32int gid)
+{
+	int r;
+	struct ext4_inode_ref inode_ref;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+
+	if (!mp)
+		return -1;
+
+	if (mp->fs.read_only) {
+		werrstr(Erdonlyfs);
+		return -1;
+	}
+
+	EXT4_MP_LOCK(mp);
+
+	r = ext4_trans_get_inode_ref(path, mp, &inode_ref);
+	if (r != 0)
+		goto Finish;
+
+	ext4_inode_set_uid(inode_ref.inode, uid);
+	ext4_inode_set_gid(inode_ref.inode, gid);
+
+	inode_ref.dirty = true;
+	r = ext4_trans_put_inode_ref(mp, &inode_ref);
+
+	Finish:
+	EXT4_MP_UNLOCK(mp);
+
+	return r;
+}
+
+int ext4_mode_get(const char *path, u32int *mode)
+{
+	struct ext4_inode_ref inode_ref;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	ext4_file f;
+	int r;
+
+	if (!mp)
+		return -1;
+
+	EXT4_MP_LOCK(mp);
+
+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);
+	if (r != 0)
+		goto Finish;
+
+	r = ext4_fs_get_inode_ref(&mp->fs, f.inode, &inode_ref);
+	if (r != 0)
+		goto Finish;
+
+	*mode = ext4_inode_get_mode(&mp->fs.sb, inode_ref.inode);
+	r = ext4_fs_put_inode_ref(&inode_ref);
+
+	Finish:
+	EXT4_MP_UNLOCK(mp);
+
+	return r;
+}
+
+int ext4_owner_get(const char *path, u32int *uid, u32int *gid)
+{
+	struct ext4_inode_ref inode_ref;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	ext4_file f;
+	int r;
+
+	if (!mp)
+		return -1;
+
+	EXT4_MP_LOCK(mp);
+
+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);
+	if (r != 0)
+		goto Finish;
+
+	r = ext4_fs_get_inode_ref(&mp->fs, f.inode, &inode_ref);
+	if (r != 0)
+		goto Finish;
+
+	*uid = ext4_inode_get_uid(inode_ref.inode);
+	*gid = ext4_inode_get_gid(inode_ref.inode);
+	r = ext4_fs_put_inode_ref(&inode_ref);
+
+	Finish:
+	EXT4_MP_UNLOCK(mp);
+
+	return r;
+}
+
+int ext4_atime_set(const char *path, u32int atime)
+{
+	struct ext4_inode_ref inode_ref;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	int r;
+
+	if (!mp)
+		return -1;
+
+	if (mp->fs.read_only) {
+		werrstr(Erdonlyfs);
+		return -1;
+	}
+
+	EXT4_MP_LOCK(mp);
+
+	r = ext4_trans_get_inode_ref(path, mp, &inode_ref);
+	if (r != 0)
+		goto Finish;
+
+	ext4_inode_set_access_time(inode_ref.inode, atime);
+	inode_ref.dirty = true;
+	r = ext4_trans_put_inode_ref(mp, &inode_ref);
+
+	Finish:
+	EXT4_MP_UNLOCK(mp);
+
+	return r;
+}
+
+int ext4_mtime_set(const char *path, u32int mtime)
+{
+	struct ext4_inode_ref inode_ref;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	int r;
+
+	if (!mp)
+		return -1;
+
+	if (mp->fs.read_only) {
+		werrstr(Erdonlyfs);
+		return -1;
+	}
+
+	EXT4_MP_LOCK(mp);
+
+	r = ext4_trans_get_inode_ref(path, mp, &inode_ref);
+	if (r != 0)
+		goto Finish;
+
+	ext4_inode_set_modif_time(inode_ref.inode, mtime);
+	inode_ref.dirty = true;
+	r = ext4_trans_put_inode_ref(mp, &inode_ref);
+
+	Finish:
+	EXT4_MP_UNLOCK(mp);
+
+	return r;
+}
+
+int ext4_ctime_set(const char *path, u32int ctime)
+{
+	struct ext4_inode_ref inode_ref;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	int r;
+
+	if (!mp)
+		return -1;
+
+	if (mp->fs.read_only) {
+		werrstr(Erdonlyfs);
+		return -1;
+	}
+
+	EXT4_MP_LOCK(mp);
+
+	r = ext4_trans_get_inode_ref(path, mp, &inode_ref);
+	if (r != 0)
+		goto Finish;
+
+	ext4_inode_set_change_inode_time(inode_ref.inode, ctime);
+	inode_ref.dirty = true;
+	r = ext4_trans_put_inode_ref(mp, &inode_ref);
+
+	Finish:
+	EXT4_MP_UNLOCK(mp);
+
+	return r;
+}
+
+int ext4_atime_get(const char *path, u32int *atime)
+{
+	struct ext4_inode_ref inode_ref;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	ext4_file f;
+	int r;
+
+	if (!mp)
+		return -1;
+
+	EXT4_MP_LOCK(mp);
+
+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);
+	if (r != 0)
+		goto Finish;
+
+	r = ext4_fs_get_inode_ref(&mp->fs, f.inode, &inode_ref);
+	if (r != 0)
+		goto Finish;
+
+	*atime = ext4_inode_get_access_time(inode_ref.inode);
+	r = ext4_fs_put_inode_ref(&inode_ref);
+
+	Finish:
+	EXT4_MP_UNLOCK(mp);
+
+	return r;
+}
+
+int ext4_mtime_get(const char *path, u32int *mtime)
+{
+	struct ext4_inode_ref inode_ref;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	ext4_file f;
+	int r;
+
+	if (!mp)
+		return -1;
+
+	EXT4_MP_LOCK(mp);
+
+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);
+	if (r != 0)
+		goto Finish;
+
+	r = ext4_fs_get_inode_ref(&mp->fs, f.inode, &inode_ref);
+	if (r != 0)
+		goto Finish;
+
+	*mtime = ext4_inode_get_modif_time(inode_ref.inode);
+	r = ext4_fs_put_inode_ref(&inode_ref);
+
+	Finish:
+	EXT4_MP_UNLOCK(mp);
+
+	return r;
+}
+
+int ext4_ctime_get(const char *path, u32int *ctime)
+{
+	struct ext4_inode_ref inode_ref;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	ext4_file f;
+	int r;
+
+	if (!mp)
+		return -1;
+
+	EXT4_MP_LOCK(mp);
+
+	r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);
+	if (r != 0)
+		goto Finish;
+
+	r = ext4_fs_get_inode_ref(&mp->fs, f.inode, &inode_ref);
+	if (r != 0)
+		goto Finish;
+
+	*ctime = ext4_inode_get_change_inode_time(inode_ref.inode);
+	r = ext4_fs_put_inode_ref(&inode_ref);
+
+	Finish:
+	EXT4_MP_UNLOCK(mp);
+
+	return r;
+}
+
+static int ext4_fsymlink_set(ext4_file *f, const void *buf, u32int size)
+{
+	struct ext4_inode_ref ref;
+	u32int sblock;
+	ext4_fsblk_t fblock;
+	u32int block_size;
+	int r;
+
+	assert(f && f->mp);
+
+	if (!size)
+		return 0;
+
+	r = ext4_fs_get_inode_ref(&f->mp->fs, f->inode, &ref);
+	if (r != 0)
+		return r;
+
+	/*Sync file size*/
+	block_size = ext4_sb_get_block_size(&f->mp->fs.sb);
+	if (size > block_size) {
+		werrstr("invalid block size");
+		r = -1;
+		goto Finish;
+	}
+	r = ext4_ftruncate_no_lock(f, 0);
+	if (r != 0)
+		goto Finish;
+
+	/*Start write back cache mode.*/
+	r = ext4_block_cache_write_back(f->mp->fs.bdev, 1);
+	if (r != 0)
+		goto Finish;
+
+	/*If the size of symlink is smaller than 60 bytes*/
+	if (size < sizeof(ref.inode->blocks)) {
+		memset(ref.inode->blocks, 0, sizeof(ref.inode->blocks));
+		memcpy(ref.inode->blocks, buf, size);
+		ext4_inode_clear_flag(ref.inode, EXT4_INODE_FLAG_EXTENTS);
+	} else {
+		u64int off;
+		ext4_fs_inode_blocks_init(&f->mp->fs, &ref);
+		r = ext4_fs_append_inode_dblk(&ref, &fblock, &sblock);
+		if (r != 0)
+			goto Finish;
+
+		off = fblock * block_size;
+		r = ext4_block_writebytes(f->mp->fs.bdev, off, buf, size);
+		if (r != 0)
+			goto Finish;
+	}
+
+	/*Stop write back cache mode*/
+	ext4_block_cache_write_back(f->mp->fs.bdev, 0);
+
+	if (r != 0)
+		goto Finish;
+
+	ext4_inode_set_size(ref.inode, size);
+	ref.dirty = true;
+
+	f->fsize = size;
+	if (f->fpos > size)
+		f->fpos = size;
+
+Finish:
+	ext4_fs_put_inode_ref(&ref);
+	return r;
+}
+
+int ext4_fsymlink(const char *target, const char *path)
+{
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	int r;
+	ext4_file f;
+	int filetype;
+
+	if (!mp)
+		return -1;
+
+	if (mp->fs.read_only) {
+		werrstr(Erdonlyfs);
+		return -1;
+	}
+
+	filetype = EXT4_DE_SYMLINK;
+
+	EXT4_MP_LOCK(mp);
+	ext4_block_cache_write_back(mp->fs.bdev, 1);
+	ext4_trans_start(mp);
+
+	r = ext4_generic_open2(&f, path, O_RDWR | O_CREAT, filetype, nil, nil);
+	if (r == 0)
+		r = ext4_fsymlink_set(&f, target, strlen(target));
+	else
+		goto Finish;
+
+	ext4_fclose(&f);
+
+Finish:
+	if (r != 0)
+		ext4_trans_abort(mp);
+	else
+		ext4_trans_stop(mp);
+
+	ext4_block_cache_write_back(mp->fs.bdev, 0);
+	EXT4_MP_UNLOCK(mp);
+	return r;
+}
+
+int ext4_readlink(const char *path, char *buf, usize bufsize, usize *rcnt)
+{
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	int r;
+	ext4_file f;
+	int filetype;
+
+	assert(buf != nil);
+
+	if (!mp)
+		return -1;
+
+	filetype = EXT4_DE_SYMLINK;
+
+	EXT4_MP_LOCK(mp);
+	ext4_block_cache_write_back(mp->fs.bdev, 1);
+	r = ext4_generic_open2(&f, path, O_RDONLY, filetype, nil, nil);
+	if (r == 0)
+		r = ext4_fread(&f, buf, bufsize, rcnt);
+	else
+		goto Finish;
+
+	ext4_fclose(&f);
+
+Finish:
+	ext4_block_cache_write_back(mp->fs.bdev, 0);
+	EXT4_MP_UNLOCK(mp);
+	return r;
+}
+
+static int ext4_mknod_set(ext4_file *f, u32int dev)
+{
+	struct ext4_inode_ref ref;
+	int r;
+
+	assert(f && f->mp);
+
+	r = ext4_fs_get_inode_ref(&f->mp->fs, f->inode, &ref);
+	if (r != 0)
+		return r;
+
+	ext4_inode_set_dev(ref.inode, dev);
+
+	ext4_inode_set_size(ref.inode, 0);
+	ref.dirty = true;
+
+	f->fsize = 0;
+	f->fpos = 0;
+
+	r = ext4_fs_put_inode_ref(&ref);
+	return r;
+}
+
+int ext4_mknod(const char *path, int filetype, u32int dev)
+{
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	int r;
+	ext4_file f;
+
+	if (!mp)
+		return -1;
+
+	if (mp->fs.read_only) {
+		werrstr(Erdonlyfs);
+		return -1;
+	}
+	/*
+	 * The filetype shouldn't be normal file, directory or
+	 * unknown.
+	 */
+	if (filetype == EXT4_DE_UNKNOWN ||
+	    filetype == EXT4_DE_REG_FILE ||
+	    filetype == EXT4_DE_DIR ||
+	    filetype == EXT4_DE_SYMLINK) {
+		werrstr(Einval);
+		return -1;
+	}
+
+	/*
+	 * Nor should it be any bogus value.
+	 */
+	if (filetype != EXT4_DE_CHRDEV &&
+	    filetype != EXT4_DE_BLKDEV &&
+	    filetype != EXT4_DE_FIFO &&
+	    filetype != EXT4_DE_SOCK) {
+		werrstr(Einval);
+		return -1;
+	}
+
+	EXT4_MP_LOCK(mp);
+	ext4_block_cache_write_back(mp->fs.bdev, 1);
+	ext4_trans_start(mp);
+
+	r = ext4_generic_open2(&f, path, O_RDWR | O_CREAT, filetype, nil, nil);
+	if (r == 0) {
+		if (filetype == EXT4_DE_CHRDEV ||
+		    filetype == EXT4_DE_BLKDEV)
+			r = ext4_mknod_set(&f, dev);
+	} else {
+		goto Finish;
+	}
+
+	ext4_fclose(&f);
+
+Finish:
+	if (r != 0)
+		ext4_trans_abort(mp);
+	else
+		ext4_trans_stop(mp);
+
+	ext4_block_cache_write_back(mp->fs.bdev, 0);
+	EXT4_MP_UNLOCK(mp);
+	return r;
+}
+
+/*********************************DIRECTORY OPERATION************************/
+
+int ext4_dir_rm(const char *path)
+{
+	int r;
+	int len;
+	ext4_file f;
+
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	struct ext4_inode_ref act;
+	struct ext4_inode_ref child;
+	struct ext4_dir_iter it;
+
+	u32int name_off;
+	u32int inode_up;
+	u32int inode_current;
+	u32int depth = 1;
+
+	bool has_children;
+	bool is_goal;
+	bool dir_end;
+
+	if (!mp)
+		return -1;
+
+	if (mp->fs.read_only) {
+		werrstr(Erdonlyfs);
+		return -1;
+	}
+
+	EXT4_MP_LOCK(mp);
+
+	struct ext4_fs *const fs = &mp->fs;
+
+	/*Check if exist.*/
+	r = ext4_generic_open(&f, path, "r", false, &inode_up, &name_off);
+	if (r != 0) {
+		EXT4_MP_UNLOCK(mp);
+		return r;
+	}
+
+	path += name_off;
+	len = ext4_path_check(path, &is_goal);
+	inode_current = f.inode;
+
+	ext4_block_cache_write_back(mp->fs.bdev, 1);
+
+	do {
+
+		u64int act_curr_pos = 0;
+		has_children = false;
+		dir_end = false;
+
+		while (r == 0 && !has_children && !dir_end) {
+
+			/*Load directory node.*/
+			r = ext4_fs_get_inode_ref(fs, inode_current, &act);
+			if (r != 0) {
+				break;
+			}
+
+			/*Initialize iterator.*/
+			r = ext4_dir_iterator_init(&it, &act, act_curr_pos);
+			if (r != 0) {
+				ext4_fs_put_inode_ref(&act);
+				break;
+			}
+
+			if (!it.curr) {
+				dir_end = true;
+				goto End;
+			}
+
+			ext4_trans_start(mp);
+
+			/*Get up directory inode when ".." entry*/
+			if ((it.curr->name_len == 2) &&
+			    ext4_is_dots(it.curr->name, it.curr->name_len)) {
+				inode_up = ext4_dir_en_get_inode(it.curr);
+			}
+
+			/*If directory or file entry,  but not "." ".." entry*/
+			if (!ext4_is_dots(it.curr->name, it.curr->name_len)) {
+
+				/*Get child inode reference do unlink
+				 * directory/file.*/
+				u32int cinode;
+				u32int inode_type;
+				cinode = ext4_dir_en_get_inode(it.curr);
+				r = ext4_fs_get_inode_ref(fs, cinode, &child);
+				if (r != 0)
+					goto End;
+
+				/*If directory with no leaf children*/
+				r = ext4_has_children(&has_children, &child);
+				if (r != 0) {
+					ext4_fs_put_inode_ref(&child);
+					goto End;
+				}
+
+				if (has_children) {
+					/*Has directory children. Go into this
+					 * directory.*/
+					inode_up = inode_current;
+					inode_current = cinode;
+					depth++;
+					ext4_fs_put_inode_ref(&child);
+					goto End;
+				}
+				inode_type = ext4_inode_type(&mp->fs.sb,
+						child.inode);
+
+				/* Truncate */
+				if (inode_type != EXT4_INODE_MODE_DIRECTORY)
+					r = ext4_trunc_inode(mp, child.index, 0);
+				else
+					r = ext4_trunc_dir(mp, &act, &child);
+
+				if (r != 0) {
+					ext4_fs_put_inode_ref(&child);
+					goto End;
+				}
+
+				/*No children in child directory or file. Just
+				 * unlink.*/
+				r = ext4_unlink(f.mp, &act, &child,
+						(char *)it.curr->name,
+						it.curr->name_len);
+				if (r != 0) {
+					ext4_fs_put_inode_ref(&child);
+					goto End;
+				}
+
+				ext4_inode_set_del_time(child.inode, -1L);
+				ext4_inode_set_links_cnt(child.inode, 0);
+				child.dirty = true;
+
+				r = ext4_fs_free_inode(&child);
+				if (r != 0) {
+					ext4_fs_put_inode_ref(&child);
+					goto End;
+				}
+
+				r = ext4_fs_put_inode_ref(&child);
+				if (r != 0)
+					goto End;
+
+			}
+
+			r = ext4_dir_iterator_next(&it);
+			if (r != 0)
+				goto End;
+
+			act_curr_pos = it.curr_off;
+End:
+			ext4_dir_iterator_fini(&it);
+			if (r == 0)
+				r = ext4_fs_put_inode_ref(&act);
+			else
+				ext4_fs_put_inode_ref(&act);
+
+			if (r != 0)
+				ext4_trans_abort(mp);
+			else
+				ext4_trans_stop(mp);
+		}
+
+		if (dir_end) {
+			/*Directory iterator reached last entry*/
+			depth--;
+			if (depth)
+				inode_current = inode_up;
+
+		}
+
+		if (r != 0)
+			break;
+
+	} while (depth);
+
+	/*Last unlink*/
+	if (r == 0 && !depth) {
+		/*Load parent.*/
+		struct ext4_inode_ref parent;
+		r = ext4_fs_get_inode_ref(&f.mp->fs, inode_up,
+				&parent);
+		if (r != 0)
+			goto Finish;
+		r = ext4_fs_get_inode_ref(&f.mp->fs, inode_current,
+				&act);
+		if (r != 0) {
+			ext4_fs_put_inode_ref(&act);
+			goto Finish;
+		}
+
+		ext4_trans_start(mp);
+
+		/* In this place all directories should be
+		 * unlinked.
+		 * Last unlink from root of current directory*/
+		r = ext4_unlink(f.mp, &parent, &act,
+				(char *)path, len);
+		if (r != 0) {
+			ext4_fs_put_inode_ref(&parent);
+			ext4_fs_put_inode_ref(&act);
+			goto Finish;
+		}
+
+		if (ext4_inode_get_links_cnt(act.inode) == 2) {
+			ext4_inode_set_del_time(act.inode, -1L);
+			ext4_inode_set_links_cnt(act.inode, 0);
+			act.dirty = true;
+			/*Truncate*/
+			r = ext4_trunc_dir(mp, &parent, &act);
+			if (r != 0) {
+				ext4_fs_put_inode_ref(&parent);
+				ext4_fs_put_inode_ref(&act);
+				goto Finish;
+			}
+
+			r = ext4_fs_free_inode(&act);
+			if (r != 0) {
+				ext4_fs_put_inode_ref(&parent);
+				ext4_fs_put_inode_ref(&act);
+				goto Finish;
+			}
+		}
+
+		r = ext4_fs_put_inode_ref(&parent);
+		if (r != 0)
+			goto Finish;
+
+		r = ext4_fs_put_inode_ref(&act);
+	Finish:
+		if (r != 0)
+			ext4_trans_abort(mp);
+		else
+			ext4_trans_stop(mp);
+	}
+
+	ext4_block_cache_write_back(mp->fs.bdev, 0);
+	EXT4_MP_UNLOCK(mp);
+
+	return r;
+}
+
+int ext4_dir_mv(const char *path, const char *new_path)
+{
+	return ext4_frename(path, new_path);
+}
+
+int ext4_dir_mk(const char *path)
+{
+	int r;
+	ext4_file f;
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+
+	if (!mp)
+		return -1;
+
+	if (mp->fs.read_only) {
+		werrstr(Erdonlyfs);
+		return -1;
+	}
+
+	EXT4_MP_LOCK(mp);
+
+	/*Check if exist.*/
+	r = ext4_generic_open(&f, path, "r", false, 0, 0);
+	if (r == 0) {
+		werrstr(Eexists);
+		r = -1;
+		goto Finish;
+	}
+
+	/*Create new directory.*/
+	r = ext4_generic_open(&f, path, "w", false, 0, 0);
+
+Finish:
+	EXT4_MP_UNLOCK(mp);
+	return r;
+}
+
+int ext4_dir_open(ext4_dir *dir, const char *path)
+{
+	struct ext4_mountpoint *mp = ext4_get_mount(path);
+	int r;
+
+	if (!mp)
+		return -1;
+
+	EXT4_MP_LOCK(mp);
+	r = ext4_generic_open(&dir->f, path, "r", false, 0, 0);
+	dir->next_off = 0;
+	EXT4_MP_UNLOCK(mp);
+	return r;
+}
+
+int ext4_dir_close(ext4_dir *dir)
+{
+    return ext4_fclose(&dir->f);
+}
+
+const ext4_direntry *ext4_dir_entry_next(ext4_dir *dir)
+{
+#define EXT4_DIR_ENTRY_OFFSET_TERM (u64int)(-1)
+
+	int r;
+	u16int name_length;
+	ext4_direntry *de = 0;
+	struct ext4_inode_ref dir_inode;
+	struct ext4_dir_iter it;
+
+	EXT4_MP_LOCK(dir->f.mp);
+
+	if (dir->next_off == EXT4_DIR_ENTRY_OFFSET_TERM) {
+		EXT4_MP_UNLOCK(dir->f.mp);
+		return 0;
+	}
+
+	r = ext4_fs_get_inode_ref(&dir->f.mp->fs, dir->f.inode, &dir_inode);
+	if (r != 0) {
+		goto Finish;
+	}
+
+	r = ext4_dir_iterator_init(&it, &dir_inode, dir->next_off);
+	if (r != 0) {
+		ext4_fs_put_inode_ref(&dir_inode);
+		goto Finish;
+	}
+
+	memset(dir->de.name, 0, sizeof(dir->de.name));
+	name_length = ext4_dir_en_get_name_len(&dir->f.mp->fs.sb,
+					       it.curr);
+	memcpy(dir->de.name, it.curr->name, name_length);
+
+	/* Directly copying the content isn't safe for Big-endian targets*/
+	dir->de.inode = ext4_dir_en_get_inode(it.curr);
+	dir->de.entry_length = ext4_dir_en_get_entry_len(it.curr);
+	dir->de.name_length = name_length;
+	dir->de.inode_type = ext4_dir_en_get_inode_type(&dir->f.mp->fs.sb,
+						      it.curr);
+
+	de = &dir->de;
+
+	ext4_dir_iterator_next(&it);
+
+	dir->next_off = it.curr ? it.curr_off : EXT4_DIR_ENTRY_OFFSET_TERM;
+
+	ext4_dir_iterator_fini(&it);
+	ext4_fs_put_inode_ref(&dir_inode);
+
+Finish:
+	EXT4_MP_UNLOCK(dir->f.mp);
+	return de;
+}
+
+void ext4_dir_entry_rewind(ext4_dir *dir)
+{
+	dir->next_off = 0;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_balloc.c
@@ -1,0 +1,617 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_trans.h"
+#include "ext4_balloc.h"
+#include "ext4_super.h"
+#include "ext4_crc32.h"
+#include "ext4_block_group.h"
+#include "ext4_fs.h"
+#include "ext4_bitmap.h"
+#include "ext4_inode.h"
+
+/**@brief Compute number of block group from block address.
+ * @param sb superblock pointer.
+ * @param baddr Absolute address of block.
+ * @return Block group index
+ */
+u32int ext4_balloc_get_bgid_of_block(struct ext4_sblock *s,
+				       u64int baddr)
+{
+	if (ext4_get32(s, first_data_block) && baddr)
+		baddr--;
+
+	return (u32int)(baddr / ext4_get32(s, blocks_per_group));
+}
+
+/**@brief Compute the starting block address of a block group
+ * @param sb   superblock pointer.
+ * @param bgid block group index
+ * @return Block address
+ */
+u64int ext4_balloc_get_block_of_bgid(struct ext4_sblock *s,
+				       u32int bgid)
+{
+	u64int baddr = 0;
+	if (ext4_get32(s, first_data_block))
+		baddr++;
+
+	baddr += bgid * ext4_get32(s, blocks_per_group);
+	return baddr;
+}
+
+static u32int ext4_balloc_bitmap_csum(struct ext4_sblock *sb,
+					void *bitmap)
+{
+	u32int checksum = 0;
+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+		u32int blocks_per_group = ext4_get32(sb, blocks_per_group);
+
+		/* First calculate crc32 checksum against fs uuid */
+		checksum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid,
+				sizeof(sb->uuid));
+		/* Then calculate crc32 checksum against block_group_desc */
+		checksum = ext4_crc32c(checksum, bitmap, blocks_per_group / 8);
+	}
+	return checksum;
+}
+
+void ext4_balloc_set_bitmap_csum(struct ext4_sblock *sb,
+				 struct ext4_bgroup *bg,
+				 void *bitmap)
+{
+	int desc_size = ext4_sb_get_desc_size(sb);
+	u32int checksum = ext4_balloc_bitmap_csum(sb, bitmap);
+	u16int lo_checksum = to_le16(checksum & 0xFFFF),
+		 hi_checksum = to_le16(checksum >> 16);
+
+	if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+		return;
+
+	/* See if we need to assign a 32bit checksum */
+	bg->block_bitmap_csum_lo = lo_checksum;
+	if (desc_size == EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		bg->block_bitmap_csum_hi = hi_checksum;
+
+}
+
+static bool
+ext4_balloc_verify_bitmap_csum(struct ext4_sblock *sb,
+			       struct ext4_bgroup *bg,
+			       void *bitmap)
+{
+	int desc_size = ext4_sb_get_desc_size(sb);
+	u32int checksum = ext4_balloc_bitmap_csum(sb, bitmap);
+	u16int lo_checksum = to_le16(checksum & 0xFFFF),
+		 hi_checksum = to_le16(checksum >> 16);
+
+	if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+		return true;
+
+	if (bg->block_bitmap_csum_lo != lo_checksum)
+		return false;
+
+	if (desc_size == EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		if (bg->block_bitmap_csum_hi != hi_checksum)
+			return false;
+
+	return true;
+}
+
+int ext4_balloc_free_block(struct ext4_inode_ref *inode_ref, ext4_fsblk_t baddr)
+{
+	struct ext4_fs *fs = inode_ref->fs;
+	struct ext4_sblock *sb = &fs->sb;
+
+	u32int bg_id = ext4_balloc_get_bgid_of_block(sb, baddr);
+	u32int index_in_group = ext4_fs_addr_to_idx_bg(sb, baddr);
+
+	/* Load block group reference */
+	struct ext4_block_group_ref bg_ref;
+	int rc = ext4_fs_get_block_group_ref(fs, bg_id, &bg_ref);
+	if (rc != 0)
+		return rc;
+
+	struct ext4_bgroup *bg = bg_ref.block_group;
+
+	/* Load block with bitmap */
+	ext4_fsblk_t bitmap_block_addr =
+	    ext4_bg_get_block_bitmap(bg, sb);
+
+	struct ext4_block bitmap_block;
+
+	rc = ext4_trans_block_get(fs->bdev, &bitmap_block, bitmap_block_addr);
+	if (rc != 0) {
+		ext4_fs_put_block_group_ref(&bg_ref);
+		return rc;
+	}
+
+	if (!ext4_balloc_verify_bitmap_csum(sb, bg, bitmap_block.data)) {
+		ext4_dbg(DEBUG_BALLOC,
+			DBG_WARN "Bitmap checksum failed."
+			"Group: %ud\n",
+			bg_ref.index);
+	}
+
+	/* Modify bitmap */
+	ext4_bmap_bit_clr(bitmap_block.data, index_in_group);
+	ext4_balloc_set_bitmap_csum(sb, bg, bitmap_block.data);
+	ext4_trans_set_block_dirty(bitmap_block.buf);
+
+	/* Release block with bitmap */
+	rc = ext4_block_set(fs->bdev, &bitmap_block);
+	if (rc != 0) {
+		/* Error in saving bitmap */
+		ext4_fs_put_block_group_ref(&bg_ref);
+		return rc;
+	}
+
+	u32int block_size = ext4_sb_get_block_size(sb);
+
+	/* Update superblock free blocks count */
+	u64int sb_free_blocks = ext4_sb_get_free_blocks_cnt(sb);
+	sb_free_blocks++;
+	ext4_sb_set_free_blocks_cnt(sb, sb_free_blocks);
+
+	/* Update inode blocks count */
+	u64int ino_blocks = ext4_inode_get_blocks_count(sb, inode_ref->inode);
+	ino_blocks -= block_size / EXT4_INODE_BLOCK_SIZE;
+	ext4_inode_set_blocks_count(sb, inode_ref->inode, ino_blocks);
+	inode_ref->dirty = true;
+
+	/* Update block group free blocks count */
+	u32int free_blocks = ext4_bg_get_free_blocks_count(bg, sb);
+	free_blocks++;
+	ext4_bg_set_free_blocks_count(bg, sb, free_blocks);
+
+	bg_ref.dirty = true;
+
+	rc = ext4_trans_try_revoke_block(fs->bdev, baddr);
+	if (rc != 0) {
+		bg_ref.dirty = false;
+		ext4_fs_put_block_group_ref(&bg_ref);
+		return rc;
+	}
+	ext4_bcache_invalidate_lba(fs->bdev->bc, baddr, 1);
+	/* Release block group reference */
+	rc = ext4_fs_put_block_group_ref(&bg_ref);
+
+	return rc;
+}
+
+int ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref,
+			    ext4_fsblk_t first, u32int count)
+{
+	int rc = 0;
+	u32int blk_cnt = count;
+	ext4_fsblk_t start_block = first;
+	struct ext4_fs *fs = inode_ref->fs;
+	struct ext4_sblock *sb = &fs->sb;
+
+	/* Compute indexes */
+	u32int bg_first = ext4_balloc_get_bgid_of_block(sb, first);
+
+	/* Compute indexes */
+	u32int bg_last = ext4_balloc_get_bgid_of_block(sb, first + count - 1);
+
+	if (!ext4_sb_feature_incom(sb, EXT4_FINCOM_FLEX_BG)) {
+		/*It is not possible without flex_bg that blocks are continuous
+		 * and and last block belongs to other bg.*/
+		if (bg_last != bg_first) {
+			ext4_dbg(DEBUG_BALLOC, DBG_WARN "FLEX_BG: disabled & "
+				"bg_last: %ud bg_first: %ud\n",
+				bg_last, bg_first);
+		}
+	}
+
+	/* Load block group reference */
+	struct ext4_block_group_ref bg_ref;
+	while (bg_first <= bg_last) {
+
+		rc = ext4_fs_get_block_group_ref(fs, bg_first, &bg_ref);
+		if (rc != 0)
+			return rc;
+
+		struct ext4_bgroup *bg = bg_ref.block_group;
+
+		u32int idx_in_bg_first;
+		idx_in_bg_first = ext4_fs_addr_to_idx_bg(sb, first);
+
+		/* Load block with bitmap */
+		ext4_fsblk_t bitmap_blk = ext4_bg_get_block_bitmap(bg, sb);
+
+		struct ext4_block blk;
+		rc = ext4_trans_block_get(fs->bdev, &blk, bitmap_blk);
+		if (rc != 0) {
+			ext4_fs_put_block_group_ref(&bg_ref);
+			return rc;
+		}
+
+		if (!ext4_balloc_verify_bitmap_csum(sb, bg, blk.data)) {
+			ext4_dbg(DEBUG_BALLOC,
+				DBG_WARN "Bitmap checksum failed."
+				"Group: %ud\n",
+				bg_ref.index);
+		}
+		u32int free_cnt;
+		free_cnt = ext4_sb_get_block_size(sb) * 8 - idx_in_bg_first;
+
+		/*If last block, free only count blocks*/
+		free_cnt = count > free_cnt ? free_cnt : count;
+
+		/* Modify bitmap */
+		ext4_bmap_bits_free(blk.data, idx_in_bg_first, free_cnt);
+		ext4_balloc_set_bitmap_csum(sb, bg, blk.data);
+		ext4_trans_set_block_dirty(blk.buf);
+
+		count -= free_cnt;
+		first += free_cnt;
+
+		/* Release block with bitmap */
+		rc = ext4_block_set(fs->bdev, &blk);
+		if (rc != 0) {
+			ext4_fs_put_block_group_ref(&bg_ref);
+			return rc;
+		}
+
+		u32int block_size = ext4_sb_get_block_size(sb);
+
+		/* Update superblock free blocks count */
+		u64int sb_free_blocks = ext4_sb_get_free_blocks_cnt(sb);
+		sb_free_blocks += free_cnt;
+		ext4_sb_set_free_blocks_cnt(sb, sb_free_blocks);
+
+		/* Update inode blocks count */
+		u64int ino_blocks;
+		ino_blocks = ext4_inode_get_blocks_count(sb, inode_ref->inode);
+		ino_blocks -= free_cnt * (block_size / EXT4_INODE_BLOCK_SIZE);
+		ext4_inode_set_blocks_count(sb, inode_ref->inode, ino_blocks);
+		inode_ref->dirty = true;
+
+		/* Update block group free blocks count */
+		u32int free_blocks;
+		free_blocks = ext4_bg_get_free_blocks_count(bg, sb);
+		free_blocks += free_cnt;
+		ext4_bg_set_free_blocks_count(bg, sb, free_blocks);
+		bg_ref.dirty = true;
+
+		/* Release block group reference */
+		rc = ext4_fs_put_block_group_ref(&bg_ref);
+		if (rc != 0)
+			break;
+
+		bg_first++;
+	}
+
+	u32int i;
+	for (i = 0;i < blk_cnt;i++) {
+		rc = ext4_trans_try_revoke_block(fs->bdev, start_block + i);
+		if (rc != 0)
+			return rc;
+
+	}
+
+	ext4_bcache_invalidate_lba(fs->bdev->bc, start_block, blk_cnt);
+	/*All blocks should be released*/
+	assert(count == 0);
+
+	return rc;
+}
+
+int ext4_balloc_alloc_block(struct ext4_inode_ref *inode_ref,
+			    ext4_fsblk_t goal,
+			    ext4_fsblk_t *fblock)
+{
+	ext4_fsblk_t alloc;
+	ext4_fsblk_t bmp_blk_adr;
+	u32int rel_blk_idx = 0;
+	u64int free_blocks;
+	int r;
+	struct ext4_sblock *sb = &inode_ref->fs->sb;
+
+	/* Load block group number for goal and relative index */
+	u32int bg_id = ext4_balloc_get_bgid_of_block(sb, goal);
+	u32int idx_in_bg = ext4_fs_addr_to_idx_bg(sb, goal);
+
+	struct ext4_block b;
+	struct ext4_block_group_ref bg_ref;
+
+	/* Load block group reference */
+	r = ext4_fs_get_block_group_ref(inode_ref->fs, bg_id, &bg_ref);
+	if (r != 0)
+		return r;
+
+	struct ext4_bgroup *bg = bg_ref.block_group;
+
+	free_blocks = ext4_bg_get_free_blocks_count(bg_ref.block_group, sb);
+	if (free_blocks == 0) {
+		/* This group has no free blocks */
+		goto goal_failed;
+	}
+
+	/* Compute indexes */
+	ext4_fsblk_t first_in_bg;
+	first_in_bg = ext4_balloc_get_block_of_bgid(sb, bg_ref.index);
+
+	u32int first_in_bg_index;
+	first_in_bg_index = ext4_fs_addr_to_idx_bg(sb, first_in_bg);
+
+	if (idx_in_bg < first_in_bg_index)
+		idx_in_bg = first_in_bg_index;
+
+	/* Load block with bitmap */
+	bmp_blk_adr = ext4_bg_get_block_bitmap(bg_ref.block_group, sb);
+
+	r = ext4_trans_block_get(inode_ref->fs->bdev, &b, bmp_blk_adr);
+	if (r != 0) {
+		ext4_fs_put_block_group_ref(&bg_ref);
+		return r;
+	}
+
+	if (!ext4_balloc_verify_bitmap_csum(sb, bg, b.data)) {
+		ext4_dbg(DEBUG_BALLOC,
+			DBG_WARN "Bitmap checksum failed."
+			"Group: %ud\n",
+			bg_ref.index);
+	}
+
+	/* Check if goal is free */
+	if (ext4_bmap_is_bit_clr(b.data, idx_in_bg)) {
+		ext4_bmap_bit_set(b.data, idx_in_bg);
+		ext4_balloc_set_bitmap_csum(sb, bg_ref.block_group,
+					    b.data);
+		ext4_trans_set_block_dirty(b.buf);
+		r = ext4_block_set(inode_ref->fs->bdev, &b);
+		if (r != 0) {
+			ext4_fs_put_block_group_ref(&bg_ref);
+			return r;
+		}
+
+		alloc = ext4_fs_bg_idx_to_addr(sb, idx_in_bg, bg_id);
+		goto success;
+	}
+
+	u32int blk_in_bg = ext4_blocks_in_group_cnt(sb, bg_id);
+
+	u32int end_idx = (idx_in_bg + 63) & ~63;
+	if (end_idx > blk_in_bg)
+		end_idx = blk_in_bg;
+
+	/* Try to find free block near to goal */
+	u32int tmp_idx;
+	for (tmp_idx = idx_in_bg + 1; tmp_idx < end_idx; ++tmp_idx) {
+		if (ext4_bmap_is_bit_clr(b.data, tmp_idx)) {
+			ext4_bmap_bit_set(b.data, tmp_idx);
+
+			ext4_balloc_set_bitmap_csum(sb, bg, b.data);
+			ext4_trans_set_block_dirty(b.buf);
+			r = ext4_block_set(inode_ref->fs->bdev, &b);
+			if (r != 0) {
+				ext4_fs_put_block_group_ref(&bg_ref);
+				return r;
+			}
+
+			alloc = ext4_fs_bg_idx_to_addr(sb, tmp_idx, bg_id);
+			goto success;
+		}
+	}
+
+	/* Find free bit in bitmap */
+	bool no_space;
+	r = ext4_bmap_bit_find_clr(b.data, idx_in_bg, blk_in_bg, &rel_blk_idx, &no_space);
+	if (r == 0) {
+		ext4_bmap_bit_set(b.data, rel_blk_idx);
+		ext4_balloc_set_bitmap_csum(sb, bg_ref.block_group, b.data);
+		ext4_trans_set_block_dirty(b.buf);
+		r = ext4_block_set(inode_ref->fs->bdev, &b);
+		if (r != 0) {
+			ext4_fs_put_block_group_ref(&bg_ref);
+			return r;
+		}
+
+		alloc = ext4_fs_bg_idx_to_addr(sb, rel_blk_idx, bg_id);
+		goto success;
+	}
+
+	/* No free block found yet */
+	r = ext4_block_set(inode_ref->fs->bdev, &b);
+	if (r != 0) {
+		ext4_fs_put_block_group_ref(&bg_ref);
+		return r;
+	}
+
+goal_failed:
+
+	r = ext4_fs_put_block_group_ref(&bg_ref);
+	if (r != 0)
+		return r;
+
+	/* Try other block groups */
+	u32int block_group_count = ext4_block_group_cnt(sb);
+	u32int bgid = (bg_id + 1) % block_group_count;
+	u32int count = block_group_count;
+
+	while (count > 0) {
+		r = ext4_fs_get_block_group_ref(inode_ref->fs, bgid, &bg_ref);
+		if (r != 0)
+			return r;
+
+		struct ext4_bgroup *bg = bg_ref.block_group;
+		free_blocks = ext4_bg_get_free_blocks_count(bg, sb);
+		if (free_blocks == 0) {
+			/* This group has no free blocks */
+			goto next_group;
+		}
+
+		/* Load block with bitmap */
+		bmp_blk_adr = ext4_bg_get_block_bitmap(bg, sb);
+		r = ext4_trans_block_get(inode_ref->fs->bdev, &b, bmp_blk_adr);
+		if (r != 0) {
+			ext4_fs_put_block_group_ref(&bg_ref);
+			return r;
+		}
+
+		if (!ext4_balloc_verify_bitmap_csum(sb, bg, b.data)) {
+			ext4_dbg(DEBUG_BALLOC,
+				DBG_WARN "Bitmap checksum failed."
+				"Group: %ud\n",
+				bg_ref.index);
+		}
+
+		/* Compute indexes */
+		first_in_bg = ext4_balloc_get_block_of_bgid(sb, bgid);
+		idx_in_bg = ext4_fs_addr_to_idx_bg(sb, first_in_bg);
+		blk_in_bg = ext4_blocks_in_group_cnt(sb, bgid);
+		first_in_bg_index = ext4_fs_addr_to_idx_bg(sb, first_in_bg);
+
+		if (idx_in_bg < first_in_bg_index)
+			idx_in_bg = first_in_bg_index;
+
+		bool no_space;
+		r = ext4_bmap_bit_find_clr(b.data, idx_in_bg, blk_in_bg, &rel_blk_idx, &no_space);
+		if (r == 0) {
+			ext4_bmap_bit_set(b.data, rel_blk_idx);
+			ext4_balloc_set_bitmap_csum(sb, bg, b.data);
+			ext4_trans_set_block_dirty(b.buf);
+			r = ext4_block_set(inode_ref->fs->bdev, &b);
+			if (r != 0) {
+				ext4_fs_put_block_group_ref(&bg_ref);
+				return r;
+			}
+
+			alloc = ext4_fs_bg_idx_to_addr(sb, rel_blk_idx, bgid);
+			goto success;
+		}
+
+		r = ext4_block_set(inode_ref->fs->bdev, &b);
+		if (r != 0) {
+			ext4_fs_put_block_group_ref(&bg_ref);
+			return r;
+		}
+
+	next_group:
+		r = ext4_fs_put_block_group_ref(&bg_ref);
+		if (r != 0) {
+			return r;
+		}
+
+		/* Goto next group */
+		bgid = (bgid + 1) % block_group_count;
+		count--;
+	}
+
+	werrstr("no free blocks");
+	return -1;
+
+success:
+    /* Empty command - because of syntax */
+    ;
+
+	u32int block_size = ext4_sb_get_block_size(sb);
+
+	/* Update superblock free blocks count */
+	u64int sb_free_blocks = ext4_sb_get_free_blocks_cnt(sb);
+	sb_free_blocks--;
+	ext4_sb_set_free_blocks_cnt(sb, sb_free_blocks);
+
+	/* Update inode blocks (different block size!) count */
+	u64int ino_blocks = ext4_inode_get_blocks_count(sb, inode_ref->inode);
+	ino_blocks += block_size / EXT4_INODE_BLOCK_SIZE;
+	ext4_inode_set_blocks_count(sb, inode_ref->inode, ino_blocks);
+	inode_ref->dirty = true;
+
+	/* Update block group free blocks count */
+
+	u32int fb_cnt = ext4_bg_get_free_blocks_count(bg_ref.block_group, sb);
+	fb_cnt--;
+	ext4_bg_set_free_blocks_count(bg_ref.block_group, sb, fb_cnt);
+
+	bg_ref.dirty = true;
+	r = ext4_fs_put_block_group_ref(&bg_ref);
+
+	*fblock = alloc;
+	return r;
+}
+
+int ext4_balloc_try_alloc_block(struct ext4_inode_ref *inode_ref,
+				ext4_fsblk_t baddr, bool *free)
+{
+	int rc;
+
+	struct ext4_fs *fs = inode_ref->fs;
+	struct ext4_sblock *sb = &fs->sb;
+
+	/* Compute indexes */
+	u32int block_group = ext4_balloc_get_bgid_of_block(sb, baddr);
+	u32int index_in_group = ext4_fs_addr_to_idx_bg(sb, baddr);
+
+	/* Load block group reference */
+	struct ext4_block_group_ref bg_ref;
+	rc = ext4_fs_get_block_group_ref(fs, block_group, &bg_ref);
+	if (rc != 0)
+		return rc;
+
+	/* Load block with bitmap */
+	ext4_fsblk_t bmp_blk_addr;
+	bmp_blk_addr = ext4_bg_get_block_bitmap(bg_ref.block_group, sb);
+
+	struct ext4_block b;
+	rc = ext4_trans_block_get(fs->bdev, &b, bmp_blk_addr);
+	if (rc != 0) {
+		ext4_fs_put_block_group_ref(&bg_ref);
+		return rc;
+	}
+
+	if (!ext4_balloc_verify_bitmap_csum(sb, bg_ref.block_group, b.data)) {
+		ext4_dbg(DEBUG_BALLOC,
+			DBG_WARN "Bitmap checksum failed."
+			"Group: %ud\n",
+			bg_ref.index);
+	}
+
+	/* Check if block is free */
+	*free = ext4_bmap_is_bit_clr(b.data, index_in_group);
+
+	/* Allocate block if possible */
+	if (*free) {
+		ext4_bmap_bit_set(b.data, index_in_group);
+		ext4_balloc_set_bitmap_csum(sb, bg_ref.block_group, b.data);
+		ext4_trans_set_block_dirty(b.buf);
+	}
+
+	/* Release block with bitmap */
+	rc = ext4_block_set(fs->bdev, &b);
+	if (rc != 0) {
+		/* Error in saving bitmap */
+		ext4_fs_put_block_group_ref(&bg_ref);
+		return rc;
+	}
+
+	/* If block is not free, return */
+	if (!(*free))
+		goto terminate;
+
+	u32int block_size = ext4_sb_get_block_size(sb);
+
+	/* Update superblock free blocks count */
+	u64int sb_free_blocks = ext4_sb_get_free_blocks_cnt(sb);
+	sb_free_blocks--;
+	ext4_sb_set_free_blocks_cnt(sb, sb_free_blocks);
+
+	/* Update inode blocks count */
+	u64int ino_blocks = ext4_inode_get_blocks_count(sb, inode_ref->inode);
+	ino_blocks += block_size / EXT4_INODE_BLOCK_SIZE;
+	ext4_inode_set_blocks_count(sb, inode_ref->inode, ino_blocks);
+	inode_ref->dirty = true;
+
+	/* Update block group free blocks count */
+	u32int fb_cnt = ext4_bg_get_free_blocks_count(bg_ref.block_group, sb);
+	fb_cnt--;
+	ext4_bg_set_free_blocks_count(bg_ref.block_group, sb, fb_cnt);
+
+	bg_ref.dirty = true;
+
+terminate:
+	return ext4_fs_put_block_group_ref(&bg_ref);
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_bcache.c
@@ -1,0 +1,286 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_bcache.h"
+#include "ext4_blockdev.h"
+#include "ext4_debug.h"
+
+static int ext4_bcache_lba_compare(struct ext4_buf *a, struct ext4_buf *b)
+{
+	 if (a->lba > b->lba)
+		 return 1;
+	 else if (a->lba < b->lba)
+		 return -1;
+	 return 0;
+}
+
+static int ext4_bcache_lru_compare(struct ext4_buf *a, struct ext4_buf *b)
+{
+	if (a->lru_id > b->lru_id)
+		return 1;
+	else if (a->lru_id < b->lru_id)
+		return -1;
+	return 0;
+}
+
+RB_GENERATE_INTERNAL(ext4_buf_lba, ext4_buf, lba_node,
+		     ext4_bcache_lba_compare, static inline)
+RB_GENERATE_INTERNAL(ext4_buf_lru, ext4_buf, lru_node,
+		     ext4_bcache_lru_compare, static inline)
+
+int ext4_bcache_init_dynamic(struct ext4_bcache *bc, u32int cnt,
+			     u32int itemsize)
+{
+	assert(bc && cnt && itemsize);
+
+	memset(bc, 0, sizeof(struct ext4_bcache));
+
+	bc->cnt = cnt;
+	bc->itemsize = itemsize;
+	bc->ref_blocks = 0;
+	bc->max_ref_blocks = 0;
+
+	return 0;
+}
+
+void ext4_bcache_cleanup(struct ext4_bcache *bc)
+{
+	struct ext4_buf *buf, *tmp;
+	RB_FOREACH_SAFE(buf, ext4_buf_lba, &bc->lba_root, tmp) {
+		ext4_block_flush_buf(bc->bdev, buf);
+		ext4_bcache_drop_buf(bc, buf);
+	}
+}
+
+int ext4_bcache_fini_dynamic(struct ext4_bcache *bc)
+{
+	memset(bc, 0, sizeof(struct ext4_bcache));
+	return 0;
+}
+
+/**@brief:
+ *
+ *  This is ext4_bcache, the module handling basic buffer-cache stuff.
+ *
+ *  Buffers in a bcache are sorted by their LBA and stored in a
+ *  RB-Tree(lba_root).
+ *
+ *  Bcache also maintains another RB-Tree(lru_root) right now, where
+ *  buffers are sorted by their LRU id.
+ *
+ *  A singly-linked list is used to track those dirty buffers which are
+ *  ready to be flushed. (Those buffers which are dirty but also referenced
+ *  are not considered ready to be flushed.)
+ *
+ *  When a buffer is not referenced, it will be stored in both lba_root
+ *  and lru_root, while it will only be stored in lba_root when it is
+ *  referenced.
+ */
+
+static struct ext4_buf *
+ext4_buf_alloc(struct ext4_bcache *bc, u64int lba)
+{
+	void *data;
+	struct ext4_buf *buf;
+	data = ext4_malloc(bc->itemsize);
+	if (!data)
+		return nil;
+
+	buf = ext4_calloc(1, sizeof(struct ext4_buf));
+	if (!buf) {
+		ext4_free(data);
+		return nil;
+	}
+
+	buf->lba = lba;
+	buf->data = data;
+	buf->bc = bc;
+	return buf;
+}
+
+static void ext4_buf_free(struct ext4_buf *buf)
+{
+	ext4_free(buf->data);
+	ext4_free(buf);
+}
+
+static struct ext4_buf *
+ext4_buf_lookup(struct ext4_bcache *bc, u64int lba)
+{
+	struct ext4_buf tmp = {
+		.lba = lba
+	};
+
+	return RB_FIND(ext4_buf_lba, &bc->lba_root, &tmp);
+}
+
+struct ext4_buf *ext4_buf_lowest_lru(struct ext4_bcache *bc)
+{
+	return RB_MIN(ext4_buf_lru, &bc->lru_root);
+}
+
+void ext4_bcache_drop_buf(struct ext4_bcache *bc, struct ext4_buf *buf)
+{
+	/* Warn on dropping any referenced buffers.*/
+	if (buf->refctr) {
+		ext4_dbg(DEBUG_BCACHE, DBG_WARN "Buffer is still referenced. "
+				"lba: %llud, refctr: %ud\n",
+				buf->lba, buf->refctr);
+	} else
+		RB_REMOVE(ext4_buf_lru, &bc->lru_root, buf);
+
+	RB_REMOVE(ext4_buf_lba, &bc->lba_root, buf);
+
+	/*Forcibly drop dirty buffer.*/
+	if (ext4_bcache_test_flag(buf, BC_DIRTY))
+		ext4_bcache_remove_dirty_node(bc, buf);
+
+	ext4_buf_free(buf);
+	bc->ref_blocks--;
+}
+
+void ext4_bcache_invalidate_buf(struct ext4_bcache *bc,
+				struct ext4_buf *buf)
+{
+	buf->end_write = nil;
+	buf->end_write_arg = nil;
+
+	/* Clear both dirty and up-to-date flags. */
+	if (ext4_bcache_test_flag(buf, BC_DIRTY))
+		ext4_bcache_remove_dirty_node(bc, buf);
+
+	ext4_bcache_clear_dirty(buf);
+}
+
+void ext4_bcache_invalidate_lba(struct ext4_bcache *bc,
+				u64int from,
+				u32int cnt)
+{
+	u64int end = from + cnt - 1;
+	struct ext4_buf *tmp = ext4_buf_lookup(bc, from), *buf;
+	RB_FOREACH_FROM(buf, ext4_buf_lba, tmp) {
+		if (buf->lba > end)
+			break;
+
+		ext4_bcache_invalidate_buf(bc, buf);
+	}
+}
+
+struct ext4_buf *
+ext4_bcache_find_get(struct ext4_bcache *bc, struct ext4_block *b,
+		     u64int lba)
+{
+	struct ext4_buf *buf = ext4_buf_lookup(bc, lba);
+	if (buf) {
+		/* If buffer is not referenced. */
+		if (!buf->refctr) {
+			/* Assign new value to LRU id and increment LRU counter
+			 * by 1*/
+			buf->lru_id = ++bc->lru_ctr;
+			RB_REMOVE(ext4_buf_lru, &bc->lru_root, buf);
+			if (ext4_bcache_test_flag(buf, BC_DIRTY))
+				ext4_bcache_remove_dirty_node(bc, buf);
+
+		}
+
+		ext4_bcache_inc_ref(buf);
+
+		b->lb_id = lba;
+		b->buf = buf;
+		b->data = buf->data;
+	}
+	return buf;
+}
+
+int ext4_bcache_alloc(struct ext4_bcache *bc, struct ext4_block *b,
+		      bool *is_new)
+{
+	/* Try to search the buffer with exaxt LBA. */
+
+	struct ext4_buf *buf = ext4_bcache_find_get(bc, b, b->lb_id);
+	if (buf) {
+		*is_new = false;
+		return 0;
+	}
+
+	/* We need to allocate one buffer.*/
+	buf = ext4_buf_alloc(bc, b->lb_id);
+	if (!buf) {
+		werrstr("memory");
+		return -1;
+	}
+
+	RB_INSERT(ext4_buf_lba, &bc->lba_root, buf);
+	/* One more buffer in bcache now. :-) */
+	bc->ref_blocks++;
+
+	/*Calc ref blocks max depth*/
+	if (bc->max_ref_blocks < bc->ref_blocks)
+		bc->max_ref_blocks = bc->ref_blocks;
+
+
+	ext4_bcache_inc_ref(buf);
+	/* Assign new value to LRU id and increment LRU counter
+	 * by 1*/
+	buf->lru_id = ++bc->lru_ctr;
+
+	b->buf = buf;
+	b->data = buf->data;
+
+	*is_new = true;
+	return 0;
+}
+
+int ext4_bcache_free(struct ext4_bcache *bc, struct ext4_block *b)
+{
+	struct ext4_buf *buf = b->buf;
+
+	assert(bc && b);
+
+	/*Check if valid.*/
+	if (!b->lb_id) {
+		werrstr("invalid block id");
+		return -1;
+	}
+
+	/*Block should have a valid pointer to ext4_buf.*/
+	assert(buf);
+
+	/*Check if someone don't try free unreferenced block cache.*/
+	assert(buf->refctr);
+
+	/*Just decrease reference counter*/
+	ext4_bcache_dec_ref(buf);
+
+	/* We are the last one touching this buffer, do the cleanups. */
+	if (!buf->refctr) {
+		RB_INSERT(ext4_buf_lru, &bc->lru_root, buf);
+		/* This buffer is ready to be flushed. */
+		if (ext4_bcache_test_flag(buf, BC_DIRTY) &&
+		    ext4_bcache_test_flag(buf, BC_UPTODATE)) {
+			if (bc->bdev->cache_write_back &&
+			    !ext4_bcache_test_flag(buf, BC_FLUSH) &&
+			    !ext4_bcache_test_flag(buf, BC_TMP))
+				ext4_bcache_insert_dirty_node(bc, buf);
+			else {
+				ext4_block_flush_buf(bc->bdev, buf);
+				ext4_bcache_clear_flag(buf, BC_FLUSH);
+			}
+		}
+
+		/* The buffer is invalidated...drop it. */
+		if (!ext4_bcache_test_flag(buf, BC_UPTODATE) ||
+		    ext4_bcache_test_flag(buf, BC_TMP))
+			ext4_bcache_drop_buf(bc, buf);
+
+	}
+
+	b->lb_id = 0;
+	b->data = 0;
+
+	return 0;
+}
+
+bool ext4_bcache_is_full(struct ext4_bcache *bc)
+{
+	return (bc->cnt <= bc->ref_blocks);
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_bitmap.c
@@ -1,0 +1,84 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_bitmap.h"
+
+void ext4_bmap_bits_free(u8int *bmap, u32int sbit, u32int bcnt)
+{
+	u32int i = sbit;
+
+	while (i & 7) {
+
+		if (!bcnt)
+			return;
+
+		ext4_bmap_bit_clr(bmap, i);
+
+		bcnt--;
+		i++;
+	}
+	sbit = i;
+	bmap += sbit >> 3;
+
+	memset(bmap, 0, bcnt >> 3);
+	bmap += bcnt >> 3;
+
+	for (i = 0; i < bcnt; ++i) {
+		ext4_bmap_bit_clr(bmap, i);
+	}
+}
+
+int ext4_bmap_bit_find_clr(u8int *bmap, u32int sbit, u32int ebit,
+			   u32int *bit_id, bool *no_space)
+{
+	u32int i;
+	u32int bcnt = ebit - sbit;
+
+	i = sbit;
+	*no_space = false;
+
+	while (i & 7) {
+
+		if(!bcnt){
+Nospace:
+			*no_space = true;
+			return -1;
+		}
+
+		if (ext4_bmap_is_bit_clr(bmap, i)) {
+			*bit_id = sbit;
+			return 0;
+		}
+
+		i++;
+		bcnt--;
+	}
+
+	sbit = i;
+	bmap += (sbit >> 3);
+
+	while (bcnt >= 8) {
+		if (*bmap != 0xFF) {
+			for (i = 0; i < 8; ++i) {
+				if (ext4_bmap_is_bit_clr(bmap, i)) {
+					*bit_id = sbit + i;
+					return 0;
+				}
+			}
+		}
+
+		bmap += 1;
+		bcnt -= 8;
+		sbit += 8;
+	}
+
+	for (i = 0; i < bcnt; ++i) {
+		if (ext4_bmap_is_bit_clr(bmap, i)) {
+			*bit_id = sbit + i;
+			return 0;
+		}
+	}
+
+	goto Nospace;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_block_group.c
@@ -1,0 +1,47 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_block_group.h"
+
+/**@brief CRC-16 look up table*/
+static u16int const crc16_tab[256] = {
+    0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, 0xC601,
+    0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, 0xCC01, 0x0CC0,
+    0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, 0x0A00, 0xCAC1, 0xCB81,
+    0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, 0xD801, 0x18C0, 0x1980, 0xD941,
+    0x1B00, 0xDBC1, 0xDA81, 0x1A40, 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01,
+    0x1DC0, 0x1C80, 0xDC41, 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0,
+    0x1680, 0xD641, 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081,
+    0x1040, 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240,
+    0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, 0x3C00,
+    0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, 0xFA01, 0x3AC0,
+    0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, 0x2800, 0xE8C1, 0xE981,
+    0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, 0xEE01, 0x2EC0, 0x2F80, 0xEF41,
+    0x2D00, 0xEDC1, 0xEC81, 0x2C40, 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700,
+    0xE7C1, 0xE681, 0x2640, 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0,
+    0x2080, 0xE041, 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281,
+    0x6240, 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441,
+    0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, 0xAA01,
+    0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, 0x7800, 0xB8C1,
+    0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, 0xBE01, 0x7EC0, 0x7F80,
+    0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, 0xB401, 0x74C0, 0x7580, 0xB541,
+    0x7700, 0xB7C1, 0xB681, 0x7640, 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101,
+    0x71C0, 0x7080, 0xB041, 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0,
+    0x5280, 0x9241, 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481,
+    0x5440, 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40,
+    0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, 0x8801,
+    0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, 0x4E00, 0x8EC1,
+    0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, 0x4400, 0x84C1, 0x8581,
+    0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, 0x8201, 0x42C0, 0x4380, 0x8341,
+    0x4100, 0x81C1, 0x8081, 0x4040};
+
+u16int ext4_bg_crc16(u16int crc, const u8int *buffer, usize len)
+{
+	while (len--)
+
+		crc = (((crc >> 8) & 0xffU) ^
+		       crc16_tab[(crc ^ *buffer++) & 0xffU]) &
+		      0x0000ffffU;
+	return crc;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_blockdev.c
@@ -1,0 +1,443 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_blockdev.h"
+#include "ext4_fs.h"
+#include "ext4_journal.h"
+
+static char Eoorop[] = "out of range operation";
+
+static void ext4_bdif_lock(struct ext4_blockdev *bdev)
+{
+	if (!bdev->bdif->lock)
+		return;
+
+	int r = bdev->bdif->lock(bdev);
+	assert(r == 0);
+}
+
+static void ext4_bdif_unlock(struct ext4_blockdev *bdev)
+{
+	if (!bdev->bdif->unlock)
+		return;
+
+	int r = bdev->bdif->unlock(bdev);
+	assert(r == 0);
+}
+
+static int ext4_bdif_bread(struct ext4_blockdev *bdev, void *buf,
+			   u64int blk_id, u32int blk_cnt)
+{
+	ext4_bdif_lock(bdev);
+	int r = bdev->bdif->bread(bdev, buf, blk_id, blk_cnt);
+	bdev->bdif->bread_ctr++;
+	ext4_bdif_unlock(bdev);
+	return r;
+}
+
+static int ext4_bdif_bwrite(struct ext4_blockdev *bdev, const void *buf,
+			    u64int blk_id, u32int blk_cnt)
+{
+	ext4_bdif_lock(bdev);
+	int r = bdev->bdif->bwrite(bdev, buf, blk_id, blk_cnt);
+	bdev->bdif->bwrite_ctr++;
+	ext4_bdif_unlock(bdev);
+	return r;
+}
+
+int ext4_block_init(struct ext4_blockdev *bdev)
+{
+	int rc;
+	assert(bdev);
+	assert(bdev->bdif);
+	assert(bdev->bdif->open &&
+		   bdev->bdif->close &&
+		   bdev->bdif->bread &&
+		   bdev->bdif->bwrite);
+
+	if (bdev->bdif->ph_refctr) {
+		bdev->bdif->ph_refctr++;
+		return 0;
+	}
+
+	/*Low level block init*/
+	rc = bdev->bdif->open(bdev);
+	if (rc != 0)
+		return rc;
+
+	bdev->bdif->ph_refctr = 1;
+	return 0;
+}
+
+int ext4_block_bind_bcache(struct ext4_blockdev *bdev, struct ext4_bcache *bc)
+{
+	assert(bdev && bc);
+	bdev->bc = bc;
+	bc->bdev = bdev;
+	return 0;
+}
+
+void ext4_block_set_lb_size(struct ext4_blockdev *bdev, u32int lb_bsize)
+{
+	/*Logical block size has to be multiply of physical */
+	assert(!(lb_bsize % bdev->bdif->ph_bsize));
+
+	bdev->lg_bsize = lb_bsize;
+	bdev->lg_bcnt = bdev->part_size / lb_bsize;
+}
+
+int ext4_block_fini(struct ext4_blockdev *bdev)
+{
+	assert(bdev);
+
+	if (!bdev->bdif->ph_refctr)
+		return 0;
+
+	bdev->bdif->ph_refctr--;
+	if (bdev->bdif->ph_refctr)
+		return 0;
+
+	/*Low level block fini*/
+	return bdev->bdif->close(bdev);
+}
+
+int ext4_block_flush_buf(struct ext4_blockdev *bdev, struct ext4_buf *buf)
+{
+	int r;
+	struct ext4_bcache *bc = bdev->bc;
+
+	if (ext4_bcache_test_flag(buf, BC_DIRTY) &&
+	    ext4_bcache_test_flag(buf, BC_UPTODATE)) {
+		r = ext4_blocks_set_direct(bdev, buf->data, buf->lba, 1);
+		if (r) {
+			if (buf->end_write) {
+				bc->dont_shake = true;
+				buf->end_write(bc, buf, r, buf->end_write_arg);
+				bc->dont_shake = false;
+			}
+
+			return r;
+		}
+
+		ext4_bcache_remove_dirty_node(bc, buf);
+		ext4_bcache_clear_flag(buf, BC_DIRTY);
+		if (buf->end_write) {
+			bc->dont_shake = true;
+			buf->end_write(bc, buf, r, buf->end_write_arg);
+			bc->dont_shake = false;
+		}
+	}
+	return 0;
+}
+
+int ext4_block_flush_lba(struct ext4_blockdev *bdev, u64int lba)
+{
+	int r = 0;
+	struct ext4_buf *buf;
+	struct ext4_block b;
+	buf = ext4_bcache_find_get(bdev->bc, &b, lba);
+	if (buf) {
+		r = ext4_block_flush_buf(bdev, buf);
+		ext4_bcache_free(bdev->bc, &b);
+	}
+	return r;
+}
+
+int ext4_block_cache_shake(struct ext4_blockdev *bdev)
+{
+	int r = 0;
+	struct ext4_buf *buf;
+	if (bdev->bc->dont_shake)
+		return 0;
+
+	bdev->bc->dont_shake = true;
+
+	while (!RB_EMPTY(&bdev->bc->lru_root) &&
+		ext4_bcache_is_full(bdev->bc)) {
+
+		buf = ext4_buf_lowest_lru(bdev->bc);
+		assert(buf);
+		if (ext4_bcache_test_flag(buf, BC_DIRTY)) {
+			r = ext4_block_flush_buf(bdev, buf);
+			if (r != 0)
+				break;
+
+		}
+
+		ext4_bcache_drop_buf(bdev->bc, buf);
+	}
+	bdev->bc->dont_shake = false;
+	return r;
+}
+
+int ext4_block_get_noread(struct ext4_blockdev *bdev, struct ext4_block *b,
+			  u64int lba)
+{
+	bool is_new;
+	int r;
+
+	assert(bdev && b);
+
+	if (!bdev->bdif->ph_refctr || !(lba < bdev->lg_bcnt)) {
+		werrstr(Eio);
+		return -1;
+	}
+
+	b->lb_id = lba;
+
+	/*If cache is full we have to (flush and) drop it anyway :(*/
+	r = ext4_block_cache_shake(bdev);
+	if (r != 0)
+		return r;
+
+	r = ext4_bcache_alloc(bdev->bc, b, &is_new);
+	if (r != 0)
+		return r;
+
+	if (!b->data) {
+		werrstr("memory");
+		return -1;
+	}
+
+	return 0;
+}
+
+int ext4_block_get(struct ext4_blockdev *bdev, struct ext4_block *b,
+		   u64int lba)
+{
+	int r = ext4_block_get_noread(bdev, b, lba);
+	if (r != 0)
+		return r;
+
+	if (ext4_bcache_test_flag(b->buf, BC_UPTODATE)) {
+		/* Data in the cache is up-to-date.
+		 * Reading from physical device is not required */
+		return 0;
+	}
+
+	r = ext4_blocks_get_direct(bdev, b->data, lba, 1);
+	if (r != 0) {
+		ext4_bcache_free(bdev->bc, b);
+		b->lb_id = 0;
+		return r;
+	}
+
+	/* Mark buffer up-to-date, since
+	 * fresh data is read from physical device just now. */
+	ext4_bcache_set_flag(b->buf, BC_UPTODATE);
+	return 0;
+}
+
+int ext4_block_set(struct ext4_blockdev *bdev, struct ext4_block *b)
+{
+	assert(bdev && b);
+	assert(b->buf);
+
+	if (!bdev->bdif->ph_refctr) {
+		werrstr(Eio);
+		return -1;
+	}
+
+	return ext4_bcache_free(bdev->bc, b);
+}
+
+int ext4_blocks_get_direct(struct ext4_blockdev *bdev, void *buf, u64int lba,
+			   u32int cnt)
+{
+	u64int pba;
+	u32int pb_cnt;
+
+	assert(bdev && buf);
+
+	pba = (lba * bdev->lg_bsize + bdev->part_offset) / bdev->bdif->ph_bsize;
+	pb_cnt = bdev->lg_bsize / bdev->bdif->ph_bsize;
+
+	return ext4_bdif_bread(bdev, buf, pba, pb_cnt * cnt);
+}
+
+int ext4_blocks_set_direct(struct ext4_blockdev *bdev, const void *buf,
+			   u64int lba, u32int cnt)
+{
+	u64int pba;
+	u32int pb_cnt;
+
+	assert(bdev && buf);
+
+	pba = (lba * bdev->lg_bsize + bdev->part_offset) / bdev->bdif->ph_bsize;
+	pb_cnt = bdev->lg_bsize / bdev->bdif->ph_bsize;
+
+	return ext4_bdif_bwrite(bdev, buf, pba, pb_cnt * cnt);
+}
+
+int ext4_block_writebytes(struct ext4_blockdev *bdev, u64int off,
+			  const void *buf, u32int len)
+{
+	u64int block_idx;
+	u32int blen;
+	u32int unalg;
+	int r = 0;
+
+	const u8int *p = (void *)buf;
+
+	assert(bdev && buf);
+
+	if (!bdev->bdif->ph_refctr) {
+		werrstr(Eio);
+		return -1;
+	}
+
+	if (off + len > bdev->part_size) {
+		werrstr(Eoorop);
+		return -1;
+	}
+
+	block_idx = ((off + bdev->part_offset) / bdev->bdif->ph_bsize);
+
+	/*OK lets deal with the first possible unaligned block*/
+	unalg = (off & (bdev->bdif->ph_bsize - 1));
+	if (unalg) {
+
+		u32int wlen = (bdev->bdif->ph_bsize - unalg) > len
+				    ? len
+				    : (bdev->bdif->ph_bsize - unalg);
+
+		r = ext4_bdif_bread(bdev, bdev->bdif->ph_bbuf, block_idx, 1);
+		if (r != 0)
+			return r;
+
+		memcpy(bdev->bdif->ph_bbuf + unalg, p, wlen);
+		r = ext4_bdif_bwrite(bdev, bdev->bdif->ph_bbuf, block_idx, 1);
+		if (r != 0)
+			return r;
+
+		p += wlen;
+		len -= wlen;
+		block_idx++;
+	}
+
+	/*Aligned data*/
+	blen = len / bdev->bdif->ph_bsize;
+	if (blen != 0) {
+		r = ext4_bdif_bwrite(bdev, p, block_idx, blen);
+		if (r != 0)
+			return r;
+
+		p += bdev->bdif->ph_bsize * blen;
+		len -= bdev->bdif->ph_bsize * blen;
+
+		block_idx += blen;
+	}
+
+	/*Rest of the data*/
+	if (len) {
+		r = ext4_bdif_bread(bdev, bdev->bdif->ph_bbuf, block_idx, 1);
+		if (r != 0)
+			return r;
+
+		memcpy(bdev->bdif->ph_bbuf, p, len);
+		r = ext4_bdif_bwrite(bdev, bdev->bdif->ph_bbuf, block_idx, 1);
+		if (r != 0)
+			return r;
+	}
+
+	return r;
+}
+
+int ext4_block_readbytes(struct ext4_blockdev *bdev, u64int off, void *buf,
+			 u32int len)
+{
+	u64int block_idx;
+	u32int blen;
+	u32int unalg;
+	int r = 0;
+
+	u8int *p = (void *)buf;
+
+	assert(bdev && buf);
+
+	if (!bdev->bdif->ph_refctr) {
+		werrstr(Eio);
+		return -1;
+	}
+
+	if (off + len > bdev->part_size) {
+		werrstr(Eoorop);
+		return -1;
+	}
+
+	block_idx = ((off + bdev->part_offset) / bdev->bdif->ph_bsize);
+
+	/*OK lets deal with the first possible unaligned block*/
+	unalg = (off & (bdev->bdif->ph_bsize - 1));
+	if (unalg) {
+
+		u32int rlen = (bdev->bdif->ph_bsize - unalg) > len
+				    ? len
+				    : (bdev->bdif->ph_bsize - unalg);
+
+		r = ext4_bdif_bread(bdev, bdev->bdif->ph_bbuf, block_idx, 1);
+		if (r != 0)
+			return r;
+
+		memcpy(p, bdev->bdif->ph_bbuf + unalg, rlen);
+
+		p += rlen;
+		len -= rlen;
+		block_idx++;
+	}
+
+	/*Aligned data*/
+	blen = len / bdev->bdif->ph_bsize;
+
+	if (blen != 0) {
+		r = ext4_bdif_bread(bdev, p, block_idx, blen);
+		if (r != 0)
+			return r;
+
+		p += bdev->bdif->ph_bsize * blen;
+		len -= bdev->bdif->ph_bsize * blen;
+
+		block_idx += blen;
+	}
+
+	/*Rest of the data*/
+	if (len) {
+		r = ext4_bdif_bread(bdev, bdev->bdif->ph_bbuf, block_idx, 1);
+		if (r != 0)
+			return r;
+
+		memcpy(p, bdev->bdif->ph_bbuf, len);
+	}
+
+	return r;
+}
+
+int ext4_block_cache_flush(struct ext4_blockdev *bdev)
+{
+	while (!SLIST_EMPTY(&bdev->bc->dirty_list)) {
+		int r;
+		struct ext4_buf *buf = SLIST_FIRST(&bdev->bc->dirty_list);
+		assert(buf);
+		r = ext4_block_flush_buf(bdev, buf);
+		if (r != 0)
+			return r;
+
+	}
+	return 0;
+}
+
+int ext4_block_cache_write_back(struct ext4_blockdev *bdev, u8int on_off)
+{
+	if (on_off)
+		bdev->cache_write_back++;
+
+	if (!on_off && bdev->cache_write_back)
+		bdev->cache_write_back--;
+
+	if (bdev->cache_write_back)
+		return 0;
+
+	/*Flush data in all delayed cache blocks*/
+	return ext4_block_cache_flush(bdev);
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_crc32.c
@@ -1,0 +1,144 @@
+/* Based on FreeBSD. */
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_crc32.h"
+
+static const u32int crc32_tab[] = {
+	0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
+	0xe963a535, 0x9e6495a3,	0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+	0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
+	0xf3b97148, 0x84be41de,	0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+	0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,	0x14015c4f, 0x63066cd9,
+	0xfa0f3d63, 0x8d080df5,	0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+	0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,	0x35b5a8fa, 0x42b2986c,
+	0xdbbbc9d6, 0xacbcf940,	0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+	0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
+	0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+	0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,	0x76dc4190, 0x01db7106,
+	0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+	0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
+	0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+	0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+	0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+	0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
+	0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+	0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
+	0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+	0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
+	0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+	0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
+	0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+	0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+	0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+	0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
+	0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+	0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
+	0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+	0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
+	0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+	0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
+	0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+	0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+	0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+	0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
+	0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+	0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
+	0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+	0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
+	0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+	0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
+};
+
+/* */
+/* CRC LOOKUP TABLE */
+/* ================ */
+/* The following CRC lookup table was generated automagically */
+/* by the Rocksoft^tm Model CRC Algorithm Table Generation */
+/* Program V1.0 using the following model parameters: */
+/* */
+/* Width : 4 bytes. */
+/* Poly : 0x1EDC6F41L */
+/* Reverse : TRUE. */
+/* */
+/* For more information on the Rocksoft^tm Model CRC Algorithm, */
+/* see the document titled "A Painless Guide to CRC Error */
+/* Detection Algorithms" by Ross Williams */
+/* ([email protected].). This document is likely to be */
+/* in the FTP archive "ftp.adelaide.edu.au/pub/rocksoft". */
+/* */
+static const u32int crc32c_tab[256] = {
+    0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L, 0xC79A971FL,
+    0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL, 0x8AD958CFL, 0x78B2DBCCL,
+    0x6BE22838L, 0x9989AB3BL, 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L,
+    0x5E133C24L, 0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL,
+    0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L, 0x9A879FA0L,
+    0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L, 0x5D1D08BFL, 0xAF768BBCL,
+    0xBC267848L, 0x4E4DFB4BL, 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L,
+    0x33ED7D2AL, 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L,
+    0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L, 0x6DFE410EL,
+    0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL, 0x30E349B1L, 0xC288CAB2L,
+    0xD1D83946L, 0x23B3BA45L, 0xF779DEAEL, 0x05125DADL, 0x1642AE59L,
+    0xE4292D5AL, 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL,
+    0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L, 0x417B1DBCL,
+    0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L, 0x86E18AA3L, 0x748A09A0L,
+    0x67DAFA54L, 0x95B17957L, 0xCBA24573L, 0x39C9C670L, 0x2A993584L,
+    0xD8F2B687L, 0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L,
+    0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L, 0x96BF4DCCL,
+    0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L, 0xDBFC821CL, 0x2997011FL,
+    0x3AC7F2EBL, 0xC8AC71E8L, 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L,
+    0x0F36E6F7L, 0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L,
+    0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L, 0xEB1FCBADL,
+    0x197448AEL, 0x0A24BB5AL, 0xF84F3859L, 0x2C855CB2L, 0xDEEEDFB1L,
+    0xCDBE2C45L, 0x3FD5AF46L, 0x7198540DL, 0x83F3D70EL, 0x90A324FAL,
+    0x62C8A7F9L, 0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L,
+    0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L, 0x3CDB9BDDL,
+    0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L, 0x82F63B78L, 0x709DB87BL,
+    0x63CD4B8FL, 0x91A6C88CL, 0x456CAC67L, 0xB7072F64L, 0xA457DC90L,
+    0x563C5F93L, 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L,
+    0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL, 0x92A8FC17L,
+    0x60C37F14L, 0x73938CE0L, 0x81F80FE3L, 0x55326B08L, 0xA759E80BL,
+    0xB4091BFFL, 0x466298FCL, 0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL,
+    0x0B21572CL, 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L,
+    0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L, 0x65D122B9L,
+    0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL, 0x2892ED69L, 0xDAF96E6AL,
+    0xC9A99D9EL, 0x3BC21E9DL, 0xEF087A76L, 0x1D63F975L, 0x0E330A81L,
+    0xFC588982L, 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL,
+    0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L, 0x38CC2A06L,
+    0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L, 0xFF56BD19L, 0x0D3D3E1AL,
+    0x1E6DCDEEL, 0xEC064EEDL, 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L,
+    0xD0DDD530L, 0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL,
+    0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL, 0x8ECEE914L,
+    0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L, 0xD3D3E1ABL, 0x21B862A8L,
+    0x32E8915CL, 0xC083125FL, 0x144976B4L, 0xE622F5B7L, 0xF5720643L,
+    0x07198540L, 0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L,
+    0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL, 0xE330A81AL,
+    0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL, 0x24AA3F05L, 0xD6C1BC06L,
+    0xC5914FF2L, 0x37FACCF1L, 0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L,
+    0x7AB90321L, 0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL,
+    0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L, 0x34F4F86AL,
+    0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL, 0x79B737BAL, 0x8BDCB4B9L,
+    0x988C474DL, 0x6AE7C44EL, 0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L,
+    0xAD7D5351L};
+
+static inline u32int crc32(u32int crc, const void *buf, u32int size,
+			     const u32int *tab)
+{
+	const u8int *p = (const u8int *)buf;
+
+	while (size--)
+		crc = tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
+
+	return (crc);
+}
+
+u32int ext4_crc32(u32int crc, const void *buf, u32int size)
+{
+	return crc32(crc, buf, size, crc32_tab);
+}
+
+u32int ext4_crc32c(u32int crc, const void *buf, u32int size)
+{
+	return crc32(crc, buf, size, crc32c_tab);
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_debug.c
@@ -1,0 +1,21 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+
+static u32int debug_mask;
+
+void ext4_dmask_set(u32int m)
+{
+	debug_mask |= m;
+}
+
+void ext4_dmask_clr(u32int m)
+{
+	debug_mask &= ~m;
+}
+
+u32int ext4_dmask_get(void)
+{
+	return debug_mask;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_dir.c
@@ -1,0 +1,649 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_trans.h"
+#include "ext4_dir.h"
+#include "ext4_dir_idx.h"
+#include "ext4_crc32.h"
+#include "ext4_inode.h"
+#include "ext4_fs.h"
+
+/* Walk through a dirent block to find a checksum "dirent" at the tail */
+static struct ext4_dir_entry_tail *
+ext4_dir_get_tail(struct ext4_inode_ref *inode_ref,
+		struct ext4_dir_en *de)
+{
+	struct ext4_dir_entry_tail *t;
+	struct ext4_sblock *sb = &inode_ref->fs->sb;
+
+	t = EXT4_DIRENT_TAIL(de, ext4_sb_get_block_size(sb));
+
+	if (t->reserved_zero1 || t->reserved_zero2)
+		return nil;
+	if (to_le16(t->rec_len) != sizeof(struct ext4_dir_entry_tail))
+		return nil;
+	if (t->reserved_ft != EXT4_DIRENTRY_DIR_CSUM)
+		return nil;
+
+	return t;
+}
+
+static u32int ext4_dir_csum(struct ext4_inode_ref *inode_ref,
+			      struct ext4_dir_en *dirent, int size)
+{
+	u32int csum;
+	struct ext4_sblock *sb = &inode_ref->fs->sb;
+	u32int ino_index = to_le32(inode_ref->index);
+	u32int ino_gen = to_le32(ext4_inode_get_generation(inode_ref->inode));
+
+	/* First calculate crc32 checksum against fs uuid */
+	csum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid, sizeof(sb->uuid));
+	/* Then calculate crc32 checksum against inode number
+	 * and inode generation */
+	csum = ext4_crc32c(csum, &ino_index, sizeof(ino_index));
+	csum = ext4_crc32c(csum, &ino_gen, sizeof(ino_gen));
+	/* Finally calculate crc32 checksum against directory entries */
+	csum = ext4_crc32c(csum, dirent, size);
+	return csum;
+}
+
+bool ext4_dir_csum_verify(struct ext4_inode_ref *inode_ref,
+			      struct ext4_dir_en *dirent)
+{
+	struct ext4_dir_entry_tail *t;
+	struct ext4_sblock *sb = &inode_ref->fs->sb;
+
+	/* Compute the checksum only if the filesystem supports it */
+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+		t = ext4_dir_get_tail(inode_ref, dirent);
+		if (!t) {
+			/* There is no space to hold the checksum */
+			return false;
+		}
+
+		intptr diff = (char *)t - (char *)dirent;
+		u32int csum = ext4_dir_csum(inode_ref, dirent, diff);
+		if (t->checksum != to_le32(csum))
+			return false;
+
+	}
+	return true;
+}
+
+void ext4_dir_init_entry_tail(struct ext4_dir_entry_tail *t)
+{
+	memset(t, 0, sizeof(struct ext4_dir_entry_tail));
+	t->rec_len = to_le16(sizeof(struct ext4_dir_entry_tail));
+	t->reserved_ft = EXT4_DIRENTRY_DIR_CSUM;
+}
+
+void ext4_dir_set_csum(struct ext4_inode_ref *inode_ref,
+			   struct ext4_dir_en *dirent)
+{
+	struct ext4_dir_entry_tail *t;
+	struct ext4_sblock *sb = &inode_ref->fs->sb;
+
+	/* Compute the checksum only if the filesystem supports it */
+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+		t = ext4_dir_get_tail(inode_ref, dirent);
+		if (!t) {
+			/* There is no space to hold the checksum */
+			return;
+		}
+
+		intptr diff = (char *)t - (char *)dirent;
+		u32int csum = ext4_dir_csum(inode_ref, dirent, diff);
+		t->checksum = to_le32(csum);
+	}
+}
+
+/**@brief Do some checks before returning iterator.
+ * @param it Iterator to be checked
+ * @param block_size Size of data block
+ * @return Error code
+ */
+static int ext4_dir_iterator_set(struct ext4_dir_iter *it,
+				 u32int block_size)
+{
+	u32int off_in_block = it->curr_off % block_size;
+	struct ext4_sblock *sb = &it->inode_ref->fs->sb;
+
+	it->curr = nil;
+
+	/* Ensure proper alignment */
+	if ((off_in_block % 4) != 0)
+		goto Ioerr;
+
+	/* Ensure that the core of the entry does not overflow the block */
+	if (off_in_block > block_size - 8)
+		goto Ioerr;
+
+	struct ext4_dir_en *en;
+	en = (void *)(it->curr_blk.data + off_in_block);
+
+	/* Ensure that the whole entry does not overflow the block */
+	u16int length = ext4_dir_en_get_entry_len(en);
+	if (off_in_block + length > block_size)
+		goto Ioerr;
+
+	/* Ensure the name length is not too large */
+	if (ext4_dir_en_get_name_len(sb, en) > length - 8)
+		goto Ioerr;
+
+	/* Everything OK - "publish" the entry */
+	it->curr = en;
+	return 0;
+Ioerr:
+	werrstr("i/o error");
+	return -1;
+}
+
+/**@brief Seek to next valid directory entry.
+ *        Here can be jumped to the next data block.
+ * @param it  Initialized iterator
+ * @param pos Position of the next entry
+ * @return Error code
+ */
+static int ext4_dir_iterator_seek(struct ext4_dir_iter *it, u64int pos)
+{
+	struct ext4_sblock *sb = &it->inode_ref->fs->sb;
+	struct ext4_inode *inode = it->inode_ref->inode;
+	struct ext4_blockdev *bdev = it->inode_ref->fs->bdev;
+	u64int size = ext4_inode_get_size(sb, inode);
+	int r;
+
+	/* The iterator is not valid until we seek to the desired position */
+	it->curr = nil;
+
+	/* Are we at the end? */
+	if (pos >= size) {
+		if (it->curr_blk.lb_id) {
+
+			r = ext4_block_set(bdev, &it->curr_blk);
+			it->curr_blk.lb_id = 0;
+			if (r != 0)
+				return r;
+		}
+
+		it->curr_off = pos;
+		return 0;
+	}
+
+	/* Compute next block address */
+	u32int block_size = ext4_sb_get_block_size(sb);
+	u64int current_blk_idx = it->curr_off / block_size;
+	u32int next_blk_idx = (u32int)(pos / block_size);
+
+	/*
+	 * If we don't have a block or are moving across block boundary,
+	 * we need to get another block
+	 */
+	if ((it->curr_blk.lb_id == 0) ||
+	    (current_blk_idx != next_blk_idx)) {
+		if (it->curr_blk.lb_id) {
+			r = ext4_block_set(bdev, &it->curr_blk);
+			it->curr_blk.lb_id = 0;
+
+			if (r != 0)
+				return r;
+		}
+
+		ext4_fsblk_t next_blk;
+		r = ext4_fs_get_inode_dblk_idx(it->inode_ref, next_blk_idx,
+					       &next_blk, false);
+		if (r != 0)
+			return r;
+
+		r = ext4_trans_block_get(bdev, &it->curr_blk, next_blk);
+		if (r != 0) {
+			it->curr_blk.lb_id = 0;
+			return r;
+		}
+	}
+
+	it->curr_off = pos;
+	return ext4_dir_iterator_set(it, block_size);
+}
+
+int ext4_dir_iterator_init(struct ext4_dir_iter *it,
+			   struct ext4_inode_ref *inode_ref, u64int pos)
+{
+	it->inode_ref = inode_ref;
+	it->curr = 0;
+	it->curr_off = 0;
+	it->curr_blk.lb_id = 0;
+
+	return ext4_dir_iterator_seek(it, pos);
+}
+
+int ext4_dir_iterator_next(struct ext4_dir_iter *it)
+{
+	int r = 0;
+	u16int skip;
+
+	while (r == 0) {
+		skip = ext4_dir_en_get_entry_len(it->curr);
+		r = ext4_dir_iterator_seek(it, it->curr_off + skip);
+
+		if (!it->curr)
+			break;
+		/*Skip nil referenced entry*/
+		if (ext4_dir_en_get_inode(it->curr) != 0)
+			break;
+	}
+
+	return r;
+}
+
+int ext4_dir_iterator_fini(struct ext4_dir_iter *it)
+{
+	it->curr = 0;
+
+	if (it->curr_blk.lb_id)
+		return ext4_block_set(it->inode_ref->fs->bdev, &it->curr_blk);
+
+	return 0;
+}
+
+void ext4_dir_write_entry(struct ext4_sblock *sb, struct ext4_dir_en *en,
+			  u16int entry_len, struct ext4_inode_ref *child,
+			  const char *name, usize name_len)
+{
+	/* Check maximum entry length */
+	assert(entry_len <= ext4_sb_get_block_size(sb));
+
+	/* Set type of entry */
+	switch (ext4_inode_type(sb, child->inode)) {
+	case EXT4_INODE_MODE_DIRECTORY:
+		ext4_dir_en_set_inode_type(sb, en, EXT4_DE_DIR);
+		break;
+	case EXT4_INODE_MODE_FILE:
+		ext4_dir_en_set_inode_type(sb, en, EXT4_DE_REG_FILE);
+		break;
+	case EXT4_INODE_MODE_SOFTLINK:
+		ext4_dir_en_set_inode_type(sb, en, EXT4_DE_SYMLINK);
+		break;
+	case EXT4_INODE_MODE_CHARDEV:
+		ext4_dir_en_set_inode_type(sb, en, EXT4_DE_CHRDEV);
+		break;
+	case EXT4_INODE_MODE_BLOCKDEV:
+		ext4_dir_en_set_inode_type(sb, en, EXT4_DE_BLKDEV);
+		break;
+	case EXT4_INODE_MODE_FIFO:
+		ext4_dir_en_set_inode_type(sb, en, EXT4_DE_FIFO);
+		break;
+	case EXT4_INODE_MODE_SOCKET:
+		ext4_dir_en_set_inode_type(sb, en, EXT4_DE_SOCK);
+		break;
+	default:
+		/* FIXME: unsupported filetype */
+		ext4_dir_en_set_inode_type(sb, en, EXT4_DE_UNKNOWN);
+	}
+
+	/* Set basic attributes */
+	ext4_dir_en_set_inode(en, child->index);
+	ext4_dir_en_set_entry_len(en, entry_len);
+	ext4_dir_en_set_name_len(sb, en, (u16int)name_len);
+
+	/* Write name */
+	memcpy(en->name, name, name_len);
+}
+
+int ext4_dir_add_entry(struct ext4_inode_ref *parent, const char *name,
+		       u32int name_len, struct ext4_inode_ref *child)
+{
+	int r;
+	struct ext4_fs *fs = parent->fs;
+	struct ext4_sblock *sb = &parent->fs->sb;
+
+	/* Index adding (if allowed) */
+	if ((ext4_sb_feature_com(sb, EXT4_FCOM_DIR_INDEX)) &&
+	    (ext4_inode_has_flag(parent->inode, EXT4_INODE_FLAG_INDEX))) {
+		r = ext4_dir_dx_add_entry(parent, child, name, name_len);
+		/* Check if index is corrupted */
+		if (r == EXT4_ERR_BAD_DX_DIR) {
+			/* Needed to clear dir index flag if corrupted */
+			ext4_inode_clear_flag(parent->inode, EXT4_INODE_FLAG_INDEX);
+			parent->dirty = true;
+		} else if (r == 0) {
+			return 0;
+		}
+	}
+
+	/* Linear algorithm */
+	u32int iblock = 0;
+	ext4_fsblk_t fblock = 0;
+	u32int block_size = ext4_sb_get_block_size(sb);
+	u64int inode_size = ext4_inode_get_size(sb, parent->inode);
+	u32int total_blocks = (u32int)(inode_size / block_size);
+
+	/* Find block, where is space for new entry and try to add */
+	bool success = false;
+	for (iblock = 0; iblock < total_blocks; ++iblock) {
+		r = ext4_fs_get_inode_dblk_idx(parent, iblock, &fblock, false);
+		if (r != 0)
+			return r;
+
+		struct ext4_block block;
+		r = ext4_trans_block_get(fs->bdev, &block, fblock);
+		if (r != 0)
+			return r;
+
+		if (!ext4_dir_csum_verify(parent, (void *)block.data)) {
+			ext4_dbg(DEBUG_DIR,
+				 DBG_WARN "Leaf block checksum failed."
+				 "Inode: %ud, "
+				 "Block: %ud\n",
+				 parent->index,
+				 iblock);
+		}
+
+		/* If adding is successful, function can finish */
+		r = ext4_dir_try_insert_entry(sb, parent, &block, child,
+						name, name_len);
+		if (r == 0)
+			success = true;
+
+		r = ext4_block_set(fs->bdev, &block);
+		if (r != 0)
+			return r;
+
+		if (success)
+			return 0;
+	}
+
+	/* No free block found - needed to allocate next data block */
+
+	iblock = 0;
+	fblock = 0;
+	r = ext4_fs_append_inode_dblk(parent, &fblock, &iblock);
+	if (r != 0)
+		return r;
+
+	/* Load new block */
+	struct ext4_block b;
+
+	r = ext4_trans_block_get_noread(fs->bdev, &b, fblock);
+	if (r != 0)
+		return r;
+
+	/* Fill block with zeroes */
+	memset(b.data, 0, block_size);
+	struct ext4_dir_en *blk_en = (void *)b.data;
+
+	/* Save new block */
+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+		u16int el = block_size - sizeof(struct ext4_dir_entry_tail);
+		ext4_dir_write_entry(sb, blk_en, el, child, name, name_len);
+		ext4_dir_init_entry_tail(EXT4_DIRENT_TAIL(b.data, block_size));
+	} else {
+		ext4_dir_write_entry(sb, blk_en, block_size, child, name,
+				name_len);
+	}
+
+	ext4_dir_set_csum(parent, (void *)b.data);
+	ext4_trans_set_block_dirty(b.buf);
+	r = ext4_block_set(fs->bdev, &b);
+
+	return r;
+}
+
+int ext4_dir_find_entry(struct ext4_dir_search_result *result,
+			struct ext4_inode_ref *parent, const char *name,
+			u32int name_len)
+{
+	int r;
+	struct ext4_sblock *sb = &parent->fs->sb;
+
+	/* Entry clear */
+	result->block.lb_id = 0;
+	result->dentry = nil;
+
+	/* Index search */
+	if ((ext4_sb_feature_com(sb, EXT4_FCOM_DIR_INDEX)) &&
+	    (ext4_inode_has_flag(parent->inode, EXT4_INODE_FLAG_INDEX))) {
+		r = ext4_dir_dx_find_entry(result, parent, name_len, name);
+		/* Check if index is corrupted */
+		if (r == EXT4_ERR_BAD_DX_DIR) {
+			/* Needed to clear dir index flag if corrupted */
+			ext4_inode_clear_flag(parent->inode, EXT4_INODE_FLAG_INDEX);
+			parent->dirty = true;
+		} else if (r == 0) {
+			return 0;
+		}
+	}
+
+	/* Linear algorithm */
+
+	u32int iblock;
+	ext4_fsblk_t fblock;
+	u32int block_size = ext4_sb_get_block_size(sb);
+	u64int inode_size = ext4_inode_get_size(sb, parent->inode);
+	u32int total_blocks = (u32int)(inode_size / block_size);
+
+	/* Walk through all data blocks */
+	for (iblock = 0; iblock < total_blocks; ++iblock) {
+		/* Load block address */
+		r = ext4_fs_get_inode_dblk_idx(parent, iblock, &fblock, false);
+		if (r != 0)
+			return r;
+
+		/* Load data block */
+		struct ext4_block b;
+		r = ext4_trans_block_get(parent->fs->bdev, &b, fblock);
+		if (r != 0){
+			werrstr("ext4_trans_block_get: %r");
+			return r;
+		}
+
+		if (!ext4_dir_csum_verify(parent, (void *)b.data)) {
+			ext4_dbg(DEBUG_DIR,
+				 DBG_WARN "Leaf block checksum failed."
+				 "Inode: %ud, "
+				 "Block: %ud\n",
+				 parent->index,
+				 iblock);
+		}
+
+		/* Try to find entry in block */
+		struct ext4_dir_en *res_entry;
+		r = ext4_dir_find_in_block(&b, sb, name_len, name, &res_entry);
+		if (r == 0) {
+			result->block = b;
+			result->dentry = res_entry;
+			return 0;
+		}
+
+		/* Entry not found - put block and continue to the next block */
+
+		r = ext4_block_set(parent->fs->bdev, &b);
+		if (r != 0)
+			return r;
+	}
+
+	return EXT4_ERR_NOT_FOUND;
+}
+
+int ext4_dir_remove_entry(struct ext4_inode_ref *parent, const char *name,
+			  u32int name_len)
+{
+	struct ext4_sblock *sb = &parent->fs->sb;
+	/* Check if removing from directory */
+	if (!ext4_inode_is_type(sb, parent->inode, EXT4_INODE_MODE_DIRECTORY)) {
+		werrstr("not a directory");
+		return -1;
+	}
+
+	/* Try to find entry */
+	struct ext4_dir_search_result result;
+	int rc = ext4_dir_find_entry(&result, parent, name, name_len);
+	if (rc != 0)
+		return rc;
+
+	/* Invalidate entry */
+	ext4_dir_en_set_inode(result.dentry, 0);
+
+	/* Store entry position in block */
+	u32int pos = (u8int *)result.dentry - result.block.data;
+
+	/*
+	 * If entry is not the first in block, it must be merged
+	 * with previous entry
+	 */
+	if (pos != 0) {
+		u32int offset = 0;
+
+		/* Start from the first entry in block */
+		struct ext4_dir_en *tmp_de =(void *)result.block.data;
+		u16int de_len = ext4_dir_en_get_entry_len(tmp_de);
+
+		/* Find direct predecessor of removed entry */
+		while ((offset + de_len) < pos) {
+			offset += ext4_dir_en_get_entry_len(tmp_de);
+			tmp_de = (void *)(result.block.data + offset);
+			de_len = ext4_dir_en_get_entry_len(tmp_de);
+		}
+
+		assert(de_len + offset == pos);
+
+		/* Add to removed entry length to predecessor's length */
+		u16int del_len;
+		del_len = ext4_dir_en_get_entry_len(result.dentry);
+		ext4_dir_en_set_entry_len(tmp_de, de_len + del_len);
+	}
+
+	ext4_dir_set_csum(parent,
+			(struct ext4_dir_en *)result.block.data);
+	ext4_trans_set_block_dirty(result.block.buf);
+
+	return ext4_dir_destroy_result(parent, &result);
+}
+
+int ext4_dir_try_insert_entry(struct ext4_sblock *sb,
+			      struct ext4_inode_ref *inode_ref,
+			      struct ext4_block *dst_blk,
+			      struct ext4_inode_ref *child, const char *name,
+			      u32int name_len)
+{
+	/* Compute required length entry and align it to 4 bytes */
+	u32int block_size = ext4_sb_get_block_size(sb);
+	u16int required_len = sizeof(struct ext4_fake_dir_entry) + name_len;
+
+	if ((required_len % 4) != 0)
+		required_len += 4 - (required_len % 4);
+
+	/* Initialize pointers, stop means to upper bound */
+	struct ext4_dir_en *start = (void *)dst_blk->data;
+	struct ext4_dir_en *stop = (void *)(dst_blk->data + block_size);
+
+	/*
+	 * Walk through the block and check for invalid entries
+	 * or entries with free space for new entry
+	 */
+	while (start < stop) {
+		u32int inode = ext4_dir_en_get_inode(start);
+		u16int rec_len = ext4_dir_en_get_entry_len(start);
+		u8int itype = ext4_dir_en_get_inode_type(sb, start);
+
+		/* If invalid and large enough entry, use it */
+		if ((inode == 0) && (itype != EXT4_DIRENTRY_DIR_CSUM) &&
+		    (rec_len >= required_len)) {
+			ext4_dir_write_entry(sb, start, rec_len, child, name,
+					     name_len);
+			ext4_dir_set_csum(inode_ref, (void *)dst_blk->data);
+			ext4_trans_set_block_dirty(dst_blk->buf);
+
+			return 0;
+		}
+
+		/* Valid entry, try to split it */
+		if (inode != 0) {
+			u16int used_len;
+			used_len = ext4_dir_en_get_name_len(sb, start);
+
+			u16int sz;
+			sz = sizeof(struct ext4_fake_dir_entry) + used_len;
+
+			if ((used_len % 4) != 0)
+				sz += 4 - (used_len % 4);
+
+			u16int free_space = rec_len - sz;
+
+			/* There is free space for new entry */
+			if (free_space >= required_len) {
+				/* Cut tail of current entry */
+				struct ext4_dir_en * new_entry;
+				new_entry = (void *)((u8int *)start + sz);
+				ext4_dir_en_set_entry_len(start, sz);
+				ext4_dir_write_entry(sb, new_entry, free_space,
+						     child, name, name_len);
+
+				ext4_dir_set_csum(inode_ref,
+						  (void *)dst_blk->data);
+				ext4_trans_set_block_dirty(dst_blk->buf);
+				return 0;
+			}
+		}
+
+		/* Jump to the next entry */
+		start = (void *)((u8int *)start + rec_len);
+	}
+
+	/* No free space found for new entry */
+	werrstr("no space");
+	return -1;
+}
+
+int ext4_dir_find_in_block(struct ext4_block *block, struct ext4_sblock *sb,
+			   usize name_len, const char *name,
+			   struct ext4_dir_en **res_entry)
+{
+	/* Start from the first entry in block */
+	struct ext4_dir_en *de = (struct ext4_dir_en *)block->data;
+
+	/* Set upper bound for cycling */
+	u8int *addr_limit = block->data + ext4_sb_get_block_size(sb);
+
+	/* Walk through the block and check entries */
+	while ((u8int *)de < addr_limit) {
+		/* Termination condition */
+		if ((u8int *)de + name_len > addr_limit)
+			break;
+
+		/* Valid entry - check it */
+		if (ext4_dir_en_get_inode(de) != 0) {
+			/* For more efficient compare only lengths firstly*/
+			int el = ext4_dir_en_get_name_len(sb, de);
+			if (el == name_len) {
+				/* Compare names */
+				if (memcmp(name, de->name, name_len) == 0) {
+					*res_entry = de;
+					return 0;
+				}
+			}
+		}
+
+		u16int de_len = ext4_dir_en_get_entry_len(de);
+
+		if (de_len == 0) {
+			werrstr("corrupt entry");
+			return -1;
+		}
+
+		/* Jump to next entry */
+		de = (struct ext4_dir_en *)((u8int *)de + de_len);
+	}
+
+	/* Entry not found */
+	return EXT4_ERR_NOT_FOUND;
+}
+
+int ext4_dir_destroy_result(struct ext4_inode_ref *parent,
+			    struct ext4_dir_search_result *result)
+{
+	if (result->block.lb_id)
+		return ext4_block_set(parent->fs->bdev, &result->block);
+
+	return 0;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_dir_idx.c
@@ -1,0 +1,1356 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_trans.h"
+#include "ext4_dir_idx.h"
+#include "ext4_dir.h"
+#include "ext4_blockdev.h"
+#include "ext4_fs.h"
+#include "ext4_super.h"
+#include "ext4_inode.h"
+#include "ext4_crc32.h"
+#include "ext4_hash.h"
+
+/**@brief Get hash version used in directory index.
+ * @param root_info Pointer to root info structure of index
+ * @return Hash algorithm version
+ */
+static inline u8int
+ext4_dir_dx_rinfo_get_hash_version(struct ext4_dir_idx_rinfo *ri)
+{
+	return ri->hash_version;
+}
+
+/**@brief Set hash version, that will be used in directory index.
+ * @param root_info Pointer to root info structure of index
+ * @param v Hash algorithm version
+ */
+static inline void
+ext4_dir_dx_rinfo_set_hash_version(struct ext4_dir_idx_rinfo *ri, u8int v)
+{
+	ri->hash_version = v;
+}
+
+/**@brief Get length of root_info structure in bytes.
+ * @param root_info Pointer to root info structure of index
+ * @return Length of the structure
+ */
+static inline u8int
+ext4_dir_dx_rinfo_get_info_length(struct ext4_dir_idx_rinfo *ri)
+{
+	return ri->info_length;
+}
+
+/**@brief Set length of root_info structure in bytes.
+ * @param root_info   Pointer to root info structure of index
+ * @param info_length Length of the structure
+ */
+static inline void
+ext4_dir_dx_root_info_set_info_length(struct ext4_dir_idx_rinfo *ri,
+				      u8int len)
+{
+	ri->info_length = len;
+}
+
+/**@brief Get number of indirect levels of HTree.
+ * @param root_info Pointer to root info structure of index
+ * @return Height of HTree (actually only 0 or 1)
+ */
+static inline u8int
+ext4_dir_dx_rinfo_get_indirect_levels(struct ext4_dir_idx_rinfo *ri)
+{
+	return ri->indirect_levels;
+}
+
+/**@brief Set number of indirect levels of HTree.
+ * @param root_info Pointer to root info structure of index
+ * @param lvl Height of HTree (actually only 0 or 1)
+ */
+static inline void
+ext4_dir_dx_rinfo_set_indirect_levels(struct ext4_dir_idx_rinfo *ri, u8int l)
+{
+	ri->indirect_levels = l;
+}
+
+/**@brief Get maximum number of index node entries.
+ * @param climit Pointer to counlimit structure
+ * @return Maximum of entries in node
+ */
+static inline u16int
+ext4_dir_dx_climit_get_limit(struct ext4_dir_idx_climit *climit)
+{
+	return to_le16(climit->limit);
+}
+
+/**@brief Set maximum number of index node entries.
+ * @param climit Pointer to counlimit structure
+ * @param limit Maximum of entries in node
+ */
+static inline void
+ext4_dir_dx_climit_set_limit(struct ext4_dir_idx_climit *climit, u16int limit)
+{
+	climit->limit = to_le16(limit);
+}
+
+/**@brief Get current number of index node entries.
+ * @param climit Pointer to counlimit structure
+ * @return Number of entries in node
+ */
+static inline u16int
+ext4_dir_dx_climit_get_count(struct ext4_dir_idx_climit *climit)
+{
+	return to_le16(climit->count);
+}
+
+/**@brief Set current number of index node entries.
+ * @param climit Pointer to counlimit structure
+ * @param count Number of entries in node
+ */
+static inline void
+ext4_dir_dx_climit_set_count(struct ext4_dir_idx_climit *climit, u16int count)
+{
+	climit->count = to_le16(count);
+}
+
+/**@brief Get hash value of index entry.
+ * @param entry Pointer to index entry
+ * @return Hash value
+ */
+static inline u32int
+ext4_dir_dx_entry_get_hash(struct ext4_dir_idx_entry *entry)
+{
+	return to_le32(entry->hash);
+}
+
+/**@brief Set hash value of index entry.
+ * @param entry Pointer to index entry
+ * @param hash  Hash value
+ */
+static inline void
+ext4_dir_dx_entry_set_hash(struct ext4_dir_idx_entry *entry, u32int hash)
+{
+	entry->hash = to_le32(hash);
+}
+
+/**@brief Get block address where child node is located.
+ * @param entry Pointer to index entry
+ * @return Block address of child node
+ */
+static inline u32int
+ext4_dir_dx_entry_get_block(struct ext4_dir_idx_entry *entry)
+{
+	return to_le32(entry->block);
+}
+
+/**@brief Set block address where child node is located.
+ * @param entry Pointer to index entry
+ * @param block Block address of child node
+ */
+static inline void
+ext4_dir_dx_entry_set_block(struct ext4_dir_idx_entry *entry, u32int block)
+{
+	entry->block = to_le32(block);
+}
+
+/**@brief Sort entry item.*/
+struct ext4_dx_sort_entry {
+	u32int hash;
+	u32int rec_len;
+	void *dentry;
+};
+
+static int ext4_dir_dx_hash_string(struct ext4_hash_info *hinfo, int len,
+				   const char *name)
+{
+	return ext2_htree_hash(name, len, hinfo->seed, hinfo->hash_version,
+			       &hinfo->hash, &hinfo->minor_hash);
+}
+
+static u32int ext4_dir_dx_checksum(struct ext4_inode_ref *inode_ref, void *de,
+				     int count_offset, int count,
+				     struct ext4_dir_idx_tail *t)
+{
+	u32int orig_cum, csum = 0;
+	struct ext4_sblock *sb = &inode_ref->fs->sb;
+	int sz;
+
+	/* Compute the checksum only if the filesystem supports it */
+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+		u32int ino_index = to_le32(inode_ref->index);
+		u32int ino_gen;
+		ino_gen = to_le32(ext4_inode_get_generation(inode_ref->inode));
+
+		sz = count_offset + (count * sizeof(struct ext4_dir_idx_tail));
+		orig_cum = t->checksum;
+		t->checksum = 0;
+		/* First calculate crc32 checksum against fs uuid */
+		csum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid, sizeof(sb->uuid));
+		/* Then calculate crc32 checksum against inode number
+		 * and inode generation */
+		csum = ext4_crc32c(csum, &ino_index, sizeof(ino_index));
+		csum = ext4_crc32c(csum, &ino_gen, sizeof(ino_gen));
+		/* After that calculate crc32 checksum against all the dx_entry */
+		csum = ext4_crc32c(csum, de, sz);
+		/* Finally calculate crc32 checksum for dx_tail */
+		csum = ext4_crc32c(csum, t, sizeof(struct ext4_dir_idx_tail));
+		t->checksum = orig_cum;
+	}
+	return csum;
+}
+
+static struct ext4_dir_idx_climit *
+ext4_dir_dx_get_climit(struct ext4_inode_ref *inode_ref,
+			   struct ext4_dir_en *dirent, int *offset)
+{
+	struct ext4_dir_en *dp;
+	struct ext4_dir_idx_root *root;
+	struct ext4_sblock *sb = &inode_ref->fs->sb;
+	u32int block_size = ext4_sb_get_block_size(sb);
+	u16int entry_len = ext4_dir_en_get_entry_len(dirent);
+	int count_offset;
+
+
+	if (entry_len == 12) {
+		root = (struct ext4_dir_idx_root *)dirent;
+		dp = (struct ext4_dir_en *)&root->dots[1];
+		if (ext4_dir_en_get_entry_len(dp) != (block_size - 12))
+			return nil;
+		if (root->info.reserved_zero)
+			return nil;
+		if (root->info.info_length != sizeof(struct ext4_dir_idx_rinfo))
+			return nil;
+		count_offset = 32;
+	} else if (entry_len == block_size) {
+		count_offset = 8;
+	} else {
+		return nil;
+	}
+
+	if (offset)
+		*offset = count_offset;
+	return (struct ext4_dir_idx_climit *)(((char *)dirent) + count_offset);
+}
+
+/*
+ * BIG FAT NOTES:
+ *       Currently we do not verify the checksum of HTree node.
+ */
+static bool ext4_dir_dx_csum_verify(struct ext4_inode_ref *inode_ref,
+				    struct ext4_dir_en *de)
+{
+	struct ext4_sblock *sb = &inode_ref->fs->sb;
+	u32int block_size = ext4_sb_get_block_size(sb);
+	int coff, limit, cnt;
+
+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+		struct ext4_dir_idx_climit *climit;
+		climit = ext4_dir_dx_get_climit(inode_ref, de, &coff);
+		if (!climit) {
+			/* Directory seems corrupted. */
+			return true;
+		}
+		struct ext4_dir_idx_tail *t;
+		limit = ext4_dir_dx_climit_get_limit(climit);
+		cnt = ext4_dir_dx_climit_get_count(climit);
+		if (coff + (limit * sizeof(struct ext4_dir_idx_entry)) >
+		    (block_size - sizeof(struct ext4_dir_idx_tail))) {
+			/* There is no space to hold the checksum */
+			return true;
+		}
+		t = (void *)(((struct ext4_dir_idx_entry *)climit) + limit);
+
+		u32int c;
+		c = to_le32(ext4_dir_dx_checksum(inode_ref, de, coff, cnt, t));
+		if (t->checksum != c)
+			return false;
+	}
+	return true;
+}
+
+
+static void ext4_dir_set_dx_csum(struct ext4_inode_ref *inode_ref,
+				 struct ext4_dir_en *dirent)
+{
+	int coff, limit, count;
+	struct ext4_sblock *sb = &inode_ref->fs->sb;
+	u32int block_size = ext4_sb_get_block_size(sb);
+
+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+		struct ext4_dir_idx_climit *climit;
+		climit = ext4_dir_dx_get_climit(inode_ref, dirent, &coff);
+		if (!climit) {
+			/* Directory seems corrupted. */
+			return;
+		}
+		struct ext4_dir_idx_tail *t;
+		limit = ext4_dir_dx_climit_get_limit(climit);
+		count = ext4_dir_dx_climit_get_count(climit);
+		if (coff + (limit * sizeof(struct ext4_dir_idx_entry)) >
+		   (block_size - sizeof(struct ext4_dir_idx_tail))) {
+			/* There is no space to hold the checksum */
+			return;
+		}
+
+		t = (void *)(((struct ext4_dir_idx_entry *)climit) + limit);
+		t->checksum = to_le32(ext4_dir_dx_checksum(inode_ref, dirent,
+					coff, count, t));
+	}
+}
+
+/****************************************************************************/
+
+int ext4_dir_dx_init(struct ext4_inode_ref *dir, struct ext4_inode_ref *parent)
+{
+	/* Load block 0, where will be index root located */
+	ext4_fsblk_t fblock;
+	u32int iblock = 0;
+	bool need_append =
+		(ext4_inode_get_size(&dir->fs->sb, dir->inode)
+			< EXT4_DIR_DX_INIT_BCNT)
+		? true : false;
+	struct ext4_sblock *sb = &dir->fs->sb;
+	u32int block_size = ext4_sb_get_block_size(&dir->fs->sb);
+	struct ext4_block block;
+
+	int rc;
+
+	if (!need_append)
+		rc = ext4_fs_init_inode_dblk_idx(dir, iblock, &fblock);
+	else
+		rc = ext4_fs_append_inode_dblk(dir, &fblock, &iblock);
+
+	if (rc != 0)
+		return rc;
+
+	rc = ext4_trans_block_get_noread(dir->fs->bdev, &block, fblock);
+	if (rc != 0)
+		return rc;
+
+	/* Initialize pointers to data structures */
+	struct ext4_dir_idx_root *root = (void *)block.data;
+	struct ext4_dir_idx_rinfo *info = &(root->info);
+
+	memset(root, 0, sizeof(struct ext4_dir_idx_root));
+	struct ext4_dir_en *de;
+
+	/* Initialize dot entries */
+	de = (struct ext4_dir_en *)root->dots;
+	ext4_dir_write_entry(sb, de, 12, dir, ".", strlen("."));
+
+	de = (struct ext4_dir_en *)(root->dots + 1);
+	u16int elen = block_size - 12;
+	ext4_dir_write_entry(sb, de, elen, parent, "..", strlen(".."));
+
+	/* Initialize root info structure */
+	u8int hash_version = ext4_get8(&dir->fs->sb, default_hash_version);
+
+	ext4_dir_dx_rinfo_set_hash_version(info, hash_version);
+	ext4_dir_dx_rinfo_set_indirect_levels(info, 0);
+	ext4_dir_dx_root_info_set_info_length(info, 8);
+
+	/* Set limit and current number of entries */
+	struct ext4_dir_idx_climit *climit;
+	climit = (struct ext4_dir_idx_climit *)root->en;
+
+	ext4_dir_dx_climit_set_count(climit, 1);
+
+	u32int entry_space;
+	entry_space = block_size - 2 * sizeof(struct ext4_dir_idx_dot_en) -
+			sizeof(struct ext4_dir_idx_rinfo);
+
+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+		entry_space -= sizeof(struct ext4_dir_idx_tail);
+
+	u16int root_limit = entry_space / sizeof(struct ext4_dir_idx_entry);
+	ext4_dir_dx_climit_set_limit(climit, root_limit);
+
+	/* Append new block, where will be new entries inserted in the future */
+	iblock++;
+	if (!need_append)
+		rc = ext4_fs_init_inode_dblk_idx(dir, iblock, &fblock);
+	else
+		rc = ext4_fs_append_inode_dblk(dir, &fblock, &iblock);
+
+	if (rc != 0) {
+		ext4_block_set(dir->fs->bdev, &block);
+		return rc;
+	}
+
+	struct ext4_block new_block;
+	rc = ext4_trans_block_get_noread(dir->fs->bdev, &new_block, fblock);
+	if (rc != 0) {
+		ext4_block_set(dir->fs->bdev, &block);
+		return rc;
+	}
+
+	/* Fill the whole block with empty entry */
+	struct ext4_dir_en *be = (void *)new_block.data;
+
+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+		u16int len = block_size - sizeof(struct ext4_dir_entry_tail);
+		ext4_dir_en_set_entry_len(be, len);
+		ext4_dir_en_set_name_len(sb, be, 0);
+		ext4_dir_en_set_inode_type(sb, be, EXT4_DE_UNKNOWN);
+		ext4_dir_init_entry_tail(EXT4_DIRENT_TAIL(be, block_size));
+		ext4_dir_set_csum(dir, be);
+	} else {
+		ext4_dir_en_set_entry_len(be, block_size);
+	}
+
+	ext4_dir_en_set_inode(be, 0);
+
+	ext4_trans_set_block_dirty(new_block.buf);
+	rc = ext4_block_set(dir->fs->bdev, &new_block);
+	if (rc != 0) {
+		ext4_block_set(dir->fs->bdev, &block);
+		return rc;
+	}
+
+	/* Connect new block to the only entry in index */
+	struct ext4_dir_idx_entry *entry = root->en;
+	ext4_dir_dx_entry_set_block(entry, iblock);
+
+	ext4_dir_set_dx_csum(dir, (struct ext4_dir_en *)block.data);
+	ext4_trans_set_block_dirty(block.buf);
+
+	return ext4_block_set(dir->fs->bdev, &block);
+}
+
+/**@brief Initialize hash info structure necessary for index operations.
+ * @param hinfo      Pointer to hinfo to be initialized
+ * @param root_block Root block (number 0) of index
+ * @param sb         Pointer to superblock
+ * @param name_len   Length of name to be computed hash value from
+ * @param name       Name to be computed hash value from
+ * @return Standard error code
+ */
+static int ext4_dir_hinfo_init(struct ext4_hash_info *hinfo,
+			       struct ext4_block *root_block,
+			       struct ext4_sblock *sb, usize name_len,
+			       const char *name)
+{
+	struct ext4_dir_idx_root *root;
+
+	root = (struct ext4_dir_idx_root *)root_block->data;
+	if ((root->info.hash_version != EXT2_HTREE_LEGACY) &&
+	    (root->info.hash_version != EXT2_HTREE_HALF_MD4) &&
+	    (root->info.hash_version != EXT2_HTREE_TEA))
+		return EXT4_ERR_BAD_DX_DIR;
+
+	/* Check unused flags */
+	if (root->info.unused_flags != 0)
+		return EXT4_ERR_BAD_DX_DIR;
+
+	/* Check indirect levels */
+	if (root->info.indirect_levels > 1)
+		return EXT4_ERR_BAD_DX_DIR;
+
+	/* Check if node limit is correct */
+	u32int block_size = ext4_sb_get_block_size(sb);
+	u32int entry_space = block_size;
+	entry_space -= 2 * sizeof(struct ext4_dir_idx_dot_en);
+	entry_space -= sizeof(struct ext4_dir_idx_rinfo);
+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+		entry_space -= sizeof(struct ext4_dir_idx_tail);
+	entry_space = entry_space / sizeof(struct ext4_dir_idx_entry);
+
+	struct ext4_dir_idx_climit *climit = (void *)root->en;
+	u16int limit = ext4_dir_dx_climit_get_limit(climit);
+	if (limit != entry_space)
+		return EXT4_ERR_BAD_DX_DIR;
+
+	/* Check hash version and modify if necessary */
+	hinfo->hash_version = ext4_dir_dx_rinfo_get_hash_version(&root->info);
+	if ((hinfo->hash_version <= EXT2_HTREE_TEA) &&
+	    (ext4_sb_check_flag(sb, EXT4_SUPERBLOCK_FLAGS_UNSIGNED_HASH))) {
+		/* Use unsigned hash */
+		hinfo->hash_version += 3;
+	}
+
+	/* Load hash seed from superblock */
+	hinfo->seed = ext4_get8(sb, hash_seed);
+
+	/* Compute hash value of name */
+	if (name)
+		return ext4_dir_dx_hash_string(hinfo, name_len, name);
+
+	return 0;
+}
+
+/**@brief Walk through index tree and load leaf with corresponding hash value.
+ * @param hinfo      Initialized hash info structure
+ * @param inode_ref  Current i-node
+ * @param root_block Root block (iblock 0), where is root node located
+ * @param dx_block   Pointer to leaf node in dx_blocks array
+ * @param dx_blocks  Array with the whole path from root to leaf
+ * @return Standard error code
+ */
+static int ext4_dir_dx_get_leaf(struct ext4_hash_info *hinfo,
+				struct ext4_inode_ref *inode_ref,
+				struct ext4_block *root_block,
+				struct ext4_dir_idx_block **dx_block,
+				struct ext4_dir_idx_block *dx_blocks)
+{
+	struct ext4_dir_idx_root *root;
+	struct ext4_dir_idx_entry *entries;
+	struct ext4_dir_idx_entry *p;
+	struct ext4_dir_idx_entry *q;
+	struct ext4_dir_idx_entry *m;
+	struct ext4_dir_idx_entry *at;
+	ext4_fsblk_t fblk;
+	u32int block_size;
+	u16int limit;
+	u16int entry_space;
+	u8int ind_level;
+	int r;
+
+	struct ext4_dir_idx_block *tmp_dx_blk = dx_blocks;
+	struct ext4_block *tmp_blk = root_block;
+	struct ext4_sblock *sb = &inode_ref->fs->sb;
+
+	block_size = ext4_sb_get_block_size(sb);
+	root = (struct ext4_dir_idx_root *)root_block->data;
+	entries = (struct ext4_dir_idx_entry *)root->en;
+	limit = ext4_dir_dx_climit_get_limit((void *)entries);
+	ind_level = ext4_dir_dx_rinfo_get_indirect_levels(&root->info);
+
+	/* Walk through the index tree */
+	while (true) {
+		u16int cnt = ext4_dir_dx_climit_get_count((void *)entries);
+		if ((cnt == 0) || (cnt > limit))
+			return EXT4_ERR_BAD_DX_DIR;
+
+		/* Do binary search in every node */
+		p = entries + 1;
+		q = entries + cnt - 1;
+
+		while (p <= q) {
+			m = p + (q - p) / 2;
+			if (ext4_dir_dx_entry_get_hash(m) > hinfo->hash)
+				q = m - 1;
+			else
+				p = m + 1;
+		}
+
+		at = p - 1;
+
+		/* Write results */
+		memcpy(&tmp_dx_blk->b, tmp_blk, sizeof(struct ext4_block));
+		tmp_dx_blk->entries = entries;
+		tmp_dx_blk->position = at;
+
+		/* Is algorithm in the leaf? */
+		if (ind_level == 0) {
+			*dx_block = tmp_dx_blk;
+			return 0;
+		}
+
+		/* Goto child node */
+		u32int n_blk = ext4_dir_dx_entry_get_block(at);
+
+		ind_level--;
+
+		r = ext4_fs_get_inode_dblk_idx(inode_ref, n_blk, &fblk, false);
+		if (r != 0)
+			return r;
+
+		r = ext4_trans_block_get(inode_ref->fs->bdev, tmp_blk, fblk);
+		if (r != 0)
+			return r;
+
+		entries = ((struct ext4_dir_idx_node *)tmp_blk->data)->entries;
+		limit = ext4_dir_dx_climit_get_limit((void *)entries);
+
+		entry_space = block_size - sizeof(struct ext4_fake_dir_entry);
+		if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+			entry_space -= sizeof(struct ext4_dir_idx_tail);
+
+		entry_space = entry_space / sizeof(struct ext4_dir_idx_entry);
+
+		if (limit != entry_space) {
+			ext4_block_set(inode_ref->fs->bdev, tmp_blk);
+			return EXT4_ERR_BAD_DX_DIR;
+		}
+
+		if (!ext4_dir_dx_csum_verify(inode_ref, (void *)tmp_blk->data)) {
+			ext4_dbg(DEBUG_DIR_IDX,
+					DBG_WARN "HTree checksum failed."
+					"Inode: %ud, "
+					"Block: %ud\n",
+					inode_ref->index,
+					n_blk);
+		}
+
+		++tmp_dx_blk;
+	}
+
+	/* Unreachable */
+}
+
+/**@brief Check if the the next block would be checked during entry search.
+ * @param inode_ref Directory i-node
+ * @param hash      Hash value to check
+ * @param dx_block  Current block
+ * @param dx_blocks Array with path from root to leaf node
+ * @return Standard Error code
+ */
+static int ext4_dir_dx_next_block(struct ext4_inode_ref *inode_ref,
+				  u32int hash,
+				  struct ext4_dir_idx_block *dx_block,
+				  struct ext4_dir_idx_block *dx_blocks)
+{
+	int r;
+	u32int num_handles = 0;
+	ext4_fsblk_t blk_adr;
+	struct ext4_dir_idx_block *p = dx_block;
+
+	/* Try to find data block with next bunch of entries */
+	while (true) {
+		u16int cnt = ext4_dir_dx_climit_get_count((void *)p->entries);
+
+		p->position++;
+		if (p->position < p->entries + cnt)
+			break;
+
+		if (p == dx_blocks)
+			return 0;
+
+		num_handles++;
+		p--;
+	}
+
+	/* Check hash collision (if not occurred - no next block cannot be
+	 * used)*/
+	u32int current_hash = ext4_dir_dx_entry_get_hash(p->position);
+	if ((hash & 1) == 0) {
+		if ((current_hash & ~1) != hash)
+			return 0;
+	}
+
+	/* Fill new path */
+	while (num_handles--) {
+		u32int blk = ext4_dir_dx_entry_get_block(p->position);
+		r = ext4_fs_get_inode_dblk_idx(inode_ref, blk, &blk_adr, false);
+		if (r != 0)
+			return r;
+
+		struct ext4_block b;
+		r = ext4_trans_block_get(inode_ref->fs->bdev, &b, blk_adr);
+		if (r != 0)
+			return r;
+
+		if (!ext4_dir_dx_csum_verify(inode_ref, (void *)b.data)) {
+			ext4_dbg(DEBUG_DIR_IDX,
+					DBG_WARN "HTree checksum failed."
+					"Inode: %ud, "
+					"Block: %ud\n",
+					inode_ref->index,
+					blk);
+		}
+
+		p++;
+
+		/* Don't forget to put old block (prevent memory leak) */
+		r = ext4_block_set(inode_ref->fs->bdev, &p->b);
+		if (r != 0)
+			return r;
+
+		memcpy(&p->b, &b, sizeof(b));
+		p->entries = ((struct ext4_dir_idx_node *)b.data)->entries;
+		p->position = p->entries;
+	}
+
+	return EXT4_ERR_NOT_FOUND;
+}
+
+int ext4_dir_dx_find_entry(struct ext4_dir_search_result *result,
+			   struct ext4_inode_ref *inode_ref, usize name_len,
+			   const char *name)
+{
+	/* Load direct block 0 (index root) */
+	ext4_fsblk_t root_block_addr;
+	int rc2;
+	int rc;
+	rc = ext4_fs_get_inode_dblk_idx(inode_ref,  0, &root_block_addr, false);
+	if (rc != 0)
+		return rc;
+
+	struct ext4_fs *fs = inode_ref->fs;
+
+	struct ext4_block root_block;
+	rc = ext4_trans_block_get(fs->bdev, &root_block, root_block_addr);
+	if (rc != 0)
+		return rc;
+
+	if (!ext4_dir_dx_csum_verify(inode_ref, (void *)root_block.data)) {
+		ext4_dbg(DEBUG_DIR_IDX,
+			 DBG_WARN "HTree root checksum failed."
+			 "Inode: %ud, "
+			 "Block: %ud\n",
+			 inode_ref->index,
+			 (u32int)0);
+	}
+
+	/* Initialize hash info (compute hash value) */
+	struct ext4_hash_info hinfo;
+	rc = ext4_dir_hinfo_init(&hinfo, &root_block, &fs->sb, name_len, name);
+	if (rc != 0) {
+		ext4_block_set(fs->bdev, &root_block);
+		return EXT4_ERR_BAD_DX_DIR;
+	}
+
+	/*
+	 * Hardcoded number 2 means maximum height of index tree,
+	 * specified in the Linux driver.
+	 */
+	struct ext4_dir_idx_block dx_blocks[2];
+	struct ext4_dir_idx_block *dx_block;
+	struct ext4_dir_idx_block *tmp;
+
+	rc = ext4_dir_dx_get_leaf(&hinfo, inode_ref, &root_block, &dx_block,
+				  dx_blocks);
+	if (rc != 0) {
+		ext4_block_set(fs->bdev, &root_block);
+		return EXT4_ERR_BAD_DX_DIR;
+	}
+
+	for (;;) {
+		/* Load leaf block */
+		u32int leaf_blk_idx;
+		ext4_fsblk_t leaf_block_addr;
+		struct ext4_block b;
+
+		leaf_blk_idx = ext4_dir_dx_entry_get_block(dx_block->position);
+		rc = ext4_fs_get_inode_dblk_idx(inode_ref, leaf_blk_idx,
+						&leaf_block_addr, false);
+		if (rc != 0)
+			break;
+
+		rc = ext4_trans_block_get(fs->bdev, &b, leaf_block_addr);
+		if (rc != 0)
+			break;
+
+		if (!ext4_dir_csum_verify(inode_ref, (void *)b.data)) {
+			ext4_dbg(DEBUG_DIR_IDX,
+				 DBG_WARN "HTree leaf block checksum failed."
+				 "Inode: %ud, "
+				 "Block: %ud\n",
+				 inode_ref->index,
+				 leaf_blk_idx);
+		}
+
+		/* Linear search inside block */
+		struct ext4_dir_en *de;
+		rc = ext4_dir_find_in_block(&b, &fs->sb, name_len, name, &de);
+
+		/* Found => return it */
+		if (rc == 0) {
+			result->block = b;
+			result->dentry = de;
+			break;
+		}
+
+		/* Not found, leave untouched */
+		rc2 = ext4_block_set(fs->bdev, &b);
+		if (rc2 != 0)
+			break;
+
+		if (rc != EXT4_ERR_NOT_FOUND)
+			break;
+
+		/* check if the next block could be checked */
+		rc = ext4_dir_dx_next_block(inode_ref, hinfo.hash, dx_block, &dx_blocks[0]);
+		if (rc != 0) {
+			if (rc == EXT4_ERR_NOT_FOUND)
+				continue;
+			break;
+		}
+	}
+
+	/* The whole path must be released (preventing memory leak) */
+	tmp = dx_blocks;
+
+	while (tmp <= dx_block) {
+		rc2 = ext4_block_set(fs->bdev, &tmp->b);
+		if (rc == 0 && rc2 != 0)
+			rc = rc2;
+		++tmp;
+	}
+
+	return rc;
+}
+
+/**@brief  Compare function used to pass in quicksort implementation.
+ *         It can compare two entries by hash value.
+ * @param arg1  First entry
+ * @param arg2  Second entry
+ * @param dummy Unused parameter, can be nil
+ *
+ * @return Classic compare result
+ *         (0: equal, -1: arg1 < arg2, 1: arg1 > arg2)
+ */
+static int ext4_dir_dx_entry_comparator(const void *arg1, const void *arg2)
+{
+	struct ext4_dx_sort_entry *entry1 = (void *)arg1;
+	struct ext4_dx_sort_entry *entry2 = (void *)arg2;
+
+	if (entry1->hash == entry2->hash)
+		return 0;
+
+	if (entry1->hash < entry2->hash)
+		return -1;
+	else
+		return 1;
+}
+
+/**@brief  Insert new index entry to block.
+ *         Note that space for new entry must be checked by caller.
+ * @param inode_ref   Directory i-node
+ * @param index_block Block where to insert new entry
+ * @param hash        Hash value covered by child node
+ * @param iblock      Logical number of child block
+ *
+ */
+static void
+ext4_dir_dx_insert_entry(struct ext4_inode_ref *inode_ref,
+			 struct ext4_dir_idx_block *index_block,
+			 u32int hash, u32int iblock)
+{
+	struct ext4_dir_idx_entry *old_index_entry = index_block->position;
+	struct ext4_dir_idx_entry *new_index_entry = old_index_entry + 1;
+	struct ext4_dir_idx_climit *climit = (void *)index_block->entries;
+	struct ext4_dir_idx_entry *start_index = index_block->entries;
+	u32int count = ext4_dir_dx_climit_get_count(climit);
+
+	usize bytes;
+	bytes = (u8int *)(start_index + count) - (u8int *)(new_index_entry);
+
+	memmove(new_index_entry + 1, new_index_entry, bytes);
+
+	ext4_dir_dx_entry_set_block(new_index_entry, iblock);
+	ext4_dir_dx_entry_set_hash(new_index_entry, hash);
+	ext4_dir_dx_climit_set_count(climit, count + 1);
+	ext4_dir_set_dx_csum(inode_ref, (void *)index_block->b.data);
+	ext4_trans_set_block_dirty(index_block->b.buf);
+}
+
+/**@brief Split directory entries to two parts preventing node overflow.
+ * @param inode_ref      Directory i-node
+ * @param hinfo          Hash info
+ * @param old_data_block Block with data to be split
+ * @param index_block    Block where index entries are located
+ * @param new_data_block Output value for newly allocated data block
+ */
+static int ext4_dir_dx_split_data(struct ext4_inode_ref *inode_ref,
+				  struct ext4_hash_info *hinfo,
+				  struct ext4_block *old_data_block,
+				  struct ext4_dir_idx_block *index_block,
+				  struct ext4_block *new_data_block)
+{
+	int rc;
+	struct ext4_sblock *sb = &inode_ref->fs->sb;
+	u32int block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
+
+	/* Allocate buffer for directory entries */
+	u8int *entry_buffer = ext4_malloc(block_size);
+	if (entry_buffer == nil) {
+		werrstr(Enomem);
+		return -1;
+	}
+
+	/* dot entry has the smallest size available */
+	u32int max_ecnt = block_size / sizeof(struct ext4_dir_idx_dot_en);
+
+	/* Allocate sort entry */
+	struct ext4_dx_sort_entry *sort;
+
+	sort = ext4_malloc(max_ecnt * sizeof(struct ext4_dx_sort_entry));
+	if (sort == nil) {
+		ext4_free(entry_buffer);
+		werrstr(Enomem);
+		return -1;
+	}
+
+	u32int idx = 0;
+	u32int real_size = 0;
+
+	/* Initialize hinfo */
+	struct ext4_hash_info hinfo_tmp;
+	memcpy(&hinfo_tmp, hinfo, sizeof(struct ext4_hash_info));
+
+	/* Load all valid entries to the buffer */
+	struct ext4_dir_en *de = (void *)old_data_block->data;
+	u8int *entry_buffer_ptr = entry_buffer;
+	while ((void *)de < (void *)(old_data_block->data + block_size)) {
+		/* Read only valid entries */
+		if (ext4_dir_en_get_inode(de) && de->name_len) {
+			u16int len = ext4_dir_en_get_name_len(sb, de);
+			rc = ext4_dir_dx_hash_string(&hinfo_tmp, len,
+						     (char *)de->name);
+			if (rc != 0) {
+				ext4_free(sort);
+				ext4_free(entry_buffer);
+				return rc;
+			}
+
+			u32int rec_len = 8 + len;
+			if ((rec_len % 4) != 0)
+				rec_len += 4 - (rec_len % 4);
+
+			memcpy(entry_buffer_ptr, de, rec_len);
+
+			sort[idx].dentry = entry_buffer_ptr;
+			sort[idx].rec_len = rec_len;
+			sort[idx].hash = hinfo_tmp.hash;
+
+			entry_buffer_ptr += rec_len;
+			real_size += rec_len;
+			idx++;
+		}
+
+		usize elen = ext4_dir_en_get_entry_len(de);
+		de = (void *)((u8int *)de + elen);
+	}
+
+	qsort(sort, idx, sizeof(struct ext4_dx_sort_entry),
+	      ext4_dir_dx_entry_comparator);
+
+	/* Allocate new block for store the second part of entries */
+	ext4_fsblk_t new_fblock;
+	u32int new_iblock;
+	rc = ext4_fs_append_inode_dblk(inode_ref, &new_fblock, &new_iblock);
+	if (rc != 0) {
+		ext4_free(sort);
+		ext4_free(entry_buffer);
+		return rc;
+	}
+
+	/* Load new block */
+	struct ext4_block new_data_block_tmp;
+	rc = ext4_trans_block_get_noread(inode_ref->fs->bdev, &new_data_block_tmp,
+				   new_fblock);
+	if (rc != 0) {
+		ext4_free(sort);
+		ext4_free(entry_buffer);
+		return rc;
+	}
+
+	/*
+	 * Distribute entries to two blocks (by size)
+	 * - compute the half
+	 */
+	u32int new_hash = 0;
+	u32int current_size = 0;
+	u32int mid = 0;
+	u32int i;
+	for (i = 0; i < idx; ++i) {
+		if ((current_size + sort[i].rec_len) > (block_size / 2)) {
+			new_hash = sort[i].hash;
+			mid = i;
+			break;
+		}
+
+		current_size += sort[i].rec_len;
+	}
+
+	/* Check hash collision */
+	u32int continued = 0;
+	if (new_hash == sort[mid - 1].hash)
+		continued = 1;
+
+	u32int off = 0;
+	void *ptr;
+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+		block_size -= sizeof(struct ext4_dir_entry_tail);
+
+	/* First part - to the old block */
+	for (i = 0; i < mid; ++i) {
+		ptr = old_data_block->data + off;
+		memcpy(ptr, sort[i].dentry, sort[i].rec_len);
+
+		struct ext4_dir_en *t = ptr;
+		if (i < (mid - 1))
+			ext4_dir_en_set_entry_len(t, sort[i].rec_len);
+		else
+			ext4_dir_en_set_entry_len(t, block_size - off);
+
+		off += sort[i].rec_len;
+	}
+
+	/* Second part - to the new block */
+	off = 0;
+	for (i = mid; i < idx; ++i) {
+		ptr = new_data_block_tmp.data + off;
+		memcpy(ptr, sort[i].dentry, sort[i].rec_len);
+
+		struct ext4_dir_en *t = ptr;
+		if (i < (idx - 1))
+			ext4_dir_en_set_entry_len(t, sort[i].rec_len);
+		else
+			ext4_dir_en_set_entry_len(t, block_size - off);
+
+		off += sort[i].rec_len;
+	}
+
+	block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
+
+	/* Do some steps to finish operation */
+	sb = &inode_ref->fs->sb;
+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+		struct ext4_dir_entry_tail *t;
+
+		t = EXT4_DIRENT_TAIL(old_data_block->data, block_size);
+		ext4_dir_init_entry_tail(t);
+		t = EXT4_DIRENT_TAIL(new_data_block_tmp.data, block_size);
+		ext4_dir_init_entry_tail(t);
+	}
+	ext4_dir_set_csum(inode_ref, (void *)old_data_block->data);
+	ext4_dir_set_csum(inode_ref, (void *)new_data_block_tmp.data);
+	ext4_trans_set_block_dirty(old_data_block->buf);
+	ext4_trans_set_block_dirty(new_data_block_tmp.buf);
+
+	ext4_free(sort);
+	ext4_free(entry_buffer);
+
+	ext4_dir_dx_insert_entry(inode_ref, index_block, new_hash + continued,
+				new_iblock);
+
+	*new_data_block = new_data_block_tmp;
+	return 0;
+}
+
+/**@brief  Split index node and maybe some parent nodes in the tree hierarchy.
+ * @param inode_ref Directory i-node
+ * @param dx_blocks Array with path from root to leaf node
+ * @param dx_block  Leaf block to be split if needed
+ * @return Error code
+ */
+static int
+ext4_dir_dx_split_index(struct ext4_inode_ref *ino_ref,
+			struct ext4_dir_idx_block *dx_blks,
+			struct ext4_dir_idx_block *dxb,
+			struct ext4_dir_idx_block **new_dx_block)
+{
+	struct ext4_sblock *sb = &ino_ref->fs->sb;
+	struct ext4_dir_idx_entry *e;
+	int r;
+
+	u32int block_size = ext4_sb_get_block_size(&ino_ref->fs->sb);
+	u32int entry_space = block_size - sizeof(struct ext4_fake_dir_entry);
+	u32int node_limit =  entry_space / sizeof(struct ext4_dir_idx_entry);
+
+	bool meta_csum = ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM);
+
+	if (dxb == dx_blks)
+		e = ((struct ext4_dir_idx_root *)dxb->b.data)->en;
+	else
+		e = ((struct ext4_dir_idx_node *)dxb->b.data)->entries;
+
+	struct ext4_dir_idx_climit *climit = (struct ext4_dir_idx_climit *)e;
+
+	u16int leaf_limit = ext4_dir_dx_climit_get_limit(climit);
+	u16int leaf_count = ext4_dir_dx_climit_get_count(climit);
+
+	/* Check if is necessary to split index block */
+	if (leaf_limit == leaf_count) {
+		struct ext4_dir_idx_entry *ren;
+		intptr levels = dxb - dx_blks;
+
+		ren = ((struct ext4_dir_idx_root *)dx_blks[0].b.data)->en;
+		struct ext4_dir_idx_climit *rclimit = (void *)ren;
+		u16int root_limit = ext4_dir_dx_climit_get_limit(rclimit);
+		u16int root_count = ext4_dir_dx_climit_get_count(rclimit);
+
+		/* Linux limitation */
+		if ((levels > 0) && (root_limit == root_count)) {
+			werrstr(Enospc);
+			return -1;
+		}
+
+		/* Add new block to directory */
+		ext4_fsblk_t new_fblk;
+		u32int new_iblk;
+		r = ext4_fs_append_inode_dblk(ino_ref, &new_fblk, &new_iblk);
+		if (r != 0)
+			return r;
+
+		/* load new block */
+		struct ext4_block b;
+		r = ext4_trans_block_get_noread(ino_ref->fs->bdev, &b, new_fblk);
+		if (r != 0)
+			return r;
+
+		struct ext4_dir_idx_node *new_node = (void *)b.data;
+		struct ext4_dir_idx_entry *new_en = new_node->entries;
+
+		memset(&new_node->fake, 0, sizeof(struct ext4_fake_dir_entry));
+		new_node->fake.entry_length = block_size;
+
+		/* Split leaf node */
+		if (levels > 0) {
+			u32int count_left = leaf_count / 2;
+			u32int count_right = leaf_count - count_left;
+			u32int hash_right;
+			usize sz;
+
+			struct ext4_dir_idx_climit *left_climit;
+			struct ext4_dir_idx_climit *right_climit;
+
+			hash_right = ext4_dir_dx_entry_get_hash(e + count_left);
+			/* Copy data to new node */
+			sz = count_right * sizeof(struct ext4_dir_idx_entry);
+			memcpy(new_en, e + count_left, sz);
+
+			/* Initialize new node */
+			left_climit = (struct ext4_dir_idx_climit *)e;
+			right_climit = (struct ext4_dir_idx_climit *)new_en;
+
+			ext4_dir_dx_climit_set_count(left_climit, count_left);
+			ext4_dir_dx_climit_set_count(right_climit, count_right);
+
+			if (meta_csum)
+				entry_space -= sizeof(struct ext4_dir_idx_tail);
+			USED(entry_space);
+
+			ext4_dir_dx_climit_set_limit(right_climit, node_limit);
+
+			/* Which index block is target for new entry */
+			u32int position_index =
+			    (dxb->position - dxb->entries);
+			if (position_index >= count_left) {
+				ext4_dir_set_dx_csum(
+						ino_ref,
+						(struct ext4_dir_en *)
+						dxb->b.data);
+				ext4_trans_set_block_dirty(dxb->b.buf);
+
+				struct ext4_block block_tmp = dxb->b;
+
+				dxb->b = b;
+
+				dxb->position =
+				    new_en + position_index - count_left;
+				dxb->entries = new_en;
+
+				b = block_tmp;
+			}
+
+			/* Finally insert new entry */
+			ext4_dir_dx_insert_entry(ino_ref, dx_blks, hash_right,
+						 new_iblk);
+			ext4_dir_set_dx_csum(ino_ref, (void*)dx_blks[0].b.data);
+			ext4_dir_set_dx_csum(ino_ref, (void*)dx_blks[1].b.data);
+			ext4_trans_set_block_dirty(dx_blks[0].b.buf);
+			ext4_trans_set_block_dirty(dx_blks[1].b.buf);
+
+			ext4_dir_set_dx_csum(ino_ref, (void *)b.data);
+			ext4_trans_set_block_dirty(b.buf);
+			return ext4_block_set(ino_ref->fs->bdev, &b);
+		} else {
+			usize sz;
+			/* Copy data from root to child block */
+			sz = leaf_count * sizeof(struct ext4_dir_idx_entry);
+			memcpy(new_en, e, sz);
+
+			struct ext4_dir_idx_climit *new_climit = (void*)new_en;
+			if (meta_csum)
+				entry_space -= sizeof(struct ext4_dir_idx_tail);
+			USED(entry_space);
+
+			ext4_dir_dx_climit_set_limit(new_climit, node_limit);
+
+			/* Set values in root node */
+			struct ext4_dir_idx_climit *new_root_climit = (void *)e;
+
+			ext4_dir_dx_climit_set_count(new_root_climit, 1);
+			ext4_dir_dx_entry_set_block(e, new_iblk);
+
+			struct ext4_dir_idx_root *r = (void *)dx_blks[0].b.data;
+			r->info.indirect_levels = 1;
+
+			/* Add new entry to the path */
+			dxb = dx_blks + 1;
+			dxb->position = dx_blks->position - e + new_en;
+			dxb->entries = new_en;
+			dxb->b = b;
+			*new_dx_block = dxb;
+
+			ext4_dir_set_dx_csum(ino_ref, (void*)dx_blks[0].b.data);
+			ext4_dir_set_dx_csum(ino_ref, (void*)dx_blks[1].b.data);
+			ext4_trans_set_block_dirty(dx_blks[0].b.buf);
+			ext4_trans_set_block_dirty(dx_blks[1].b.buf);
+		}
+	}
+
+	return 0;
+}
+
+int ext4_dir_dx_add_entry(struct ext4_inode_ref *parent,
+			  struct ext4_inode_ref *child, const char *name, u32int name_len)
+{
+	int rc2 = 0;
+	int r;
+	/* Get direct block 0 (index root) */
+	ext4_fsblk_t rblock_addr;
+	r =  ext4_fs_get_inode_dblk_idx(parent, 0, &rblock_addr, false);
+	if (r != 0)
+		return r;
+
+	struct ext4_fs *fs = parent->fs;
+	struct ext4_block root_blk;
+
+	r = ext4_trans_block_get(fs->bdev, &root_blk, rblock_addr);
+	if (r != 0)
+		return r;
+
+	if (!ext4_dir_dx_csum_verify(parent, (void*)root_blk.data)) {
+		ext4_dbg(DEBUG_DIR_IDX,
+			 DBG_WARN "HTree root checksum failed."
+			 "Inode: %ud, "
+			 "Block: %ud\n",
+			 parent->index,
+			 (u32int)0);
+	}
+
+	/* Initialize hinfo structure (mainly compute hash) */
+	struct ext4_hash_info hinfo;
+	r = ext4_dir_hinfo_init(&hinfo, &root_blk, &fs->sb, name_len, name);
+	if (r != 0) {
+		ext4_block_set(fs->bdev, &root_blk);
+		return EXT4_ERR_BAD_DX_DIR;
+	}
+
+	/*
+	 * Hardcoded number 2 means maximum height of index
+	 * tree defined in Linux.
+	 */
+	struct ext4_dir_idx_block dx_blks[2];
+	struct ext4_dir_idx_block *dx_blk;
+	struct ext4_dir_idx_block *dx_it;
+
+	r = ext4_dir_dx_get_leaf(&hinfo, parent, &root_blk, &dx_blk, dx_blks);
+	if (r != 0) {
+		r = EXT4_ERR_BAD_DX_DIR;
+		goto release_index;
+	}
+
+	/* Try to insert to existing data block */
+	u32int leaf_block_idx = ext4_dir_dx_entry_get_block(dx_blk->position);
+	ext4_fsblk_t leaf_block_addr;
+	r = ext4_fs_get_inode_dblk_idx(parent, leaf_block_idx,
+						&leaf_block_addr, false);
+	if (r != 0)
+		goto release_index;
+
+	/*
+	 * Check if there is needed to split index node
+	 * (and recursively also parent nodes)
+	 */
+	r = ext4_dir_dx_split_index(parent, dx_blks, dx_blk, &dx_blk);
+	if (r != 0)
+		goto release_target_index;
+
+	struct ext4_block target_block;
+	r = ext4_trans_block_get(fs->bdev, &target_block, leaf_block_addr);
+	if (r != 0)
+		goto release_index;
+
+	if (!ext4_dir_csum_verify(parent,(void *)target_block.data)) {
+		ext4_dbg(DEBUG_DIR_IDX,
+				DBG_WARN "HTree leaf block checksum failed."
+				"Inode: %ud, "
+				"Block: %ud\n",
+				parent->index,
+				leaf_block_idx);
+	}
+
+	/* Check if insert operation passed */
+	r = ext4_dir_try_insert_entry(&fs->sb, parent, &target_block, child,
+					name, name_len);
+	if (r == 0)
+		goto release_target_index;
+
+	/* Split entries to two blocks (includes sorting by hash value) */
+	struct ext4_block new_block;
+	r = ext4_dir_dx_split_data(parent, &hinfo, &target_block, dx_blk,
+				    &new_block);
+	if (r != 0)
+		goto release_target_index;
+
+	/* Where to save new entry */
+	u32int blk_hash = ext4_dir_dx_entry_get_hash(dx_blk->position + 1);
+	if (hinfo.hash >= blk_hash)
+		r = ext4_dir_try_insert_entry(&fs->sb, parent, &new_block,
+						child, name, name_len);
+	else
+		r = ext4_dir_try_insert_entry(&fs->sb, parent, &target_block,
+						child, name, name_len);
+	if (r != 0)
+		goto release_target_index;
+
+	/* Cleanup */
+	r = ext4_block_set(fs->bdev, &new_block);
+	if (r != 0)
+		return r;
+
+/* Cleanup operations */
+
+release_target_index:
+	rc2 = r;
+
+	r = ext4_block_set(fs->bdev, &target_block);
+	if (r != 0)
+		return r;
+
+release_index:
+	if (r != 0)
+		rc2 = r;
+
+	dx_it = dx_blks;
+
+	while (dx_it <= dx_blk) {
+		r = ext4_block_set(fs->bdev, &dx_it->b);
+		if (r != 0)
+			return r;
+
+		dx_it++;
+	}
+
+	return rc2;
+}
+
+int ext4_dir_dx_reset_parent_inode(struct ext4_inode_ref *dir,
+                                   u32int parent_inode)
+{
+	/* Load block 0, where will be index root located */
+	ext4_fsblk_t fblock;
+	int rc = ext4_fs_get_inode_dblk_idx(dir, 0, &fblock, false);
+	if (rc != 0)
+		return rc;
+
+	struct ext4_block block;
+	rc = ext4_trans_block_get(dir->fs->bdev, &block, fblock);
+	if (rc != 0)
+		return rc;
+
+	if (!ext4_dir_dx_csum_verify(dir, (void *)block.data)) {
+		ext4_dbg(DEBUG_DIR_IDX,
+			 DBG_WARN "HTree root checksum failed."
+			 "Inode: %ud, "
+			 "Block: %ud\n",
+			 dir->index,
+			 (u32int)0);
+	}
+
+	/* Initialize pointers to data structures */
+	struct ext4_dir_idx_root *root = (void *)block.data;
+
+	/* Fill the inode field with a new parent ino. */
+	ext4_dx_dot_en_set_inode(&root->dots[1], parent_inode);
+
+	ext4_dir_set_dx_csum(dir, (void *)block.data);
+	ext4_trans_set_block_dirty(block.buf);
+
+	return ext4_block_set(dir->fs->bdev, &block);
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_extent.c
@@ -1,0 +1,2218 @@
+#include "ext4_config.h"
+#include "ext4_debug.h"
+#include "ext4_fs.h"
+#include "ext4_trans.h"
+#include "ext4_blockdev.h"
+#include "ext4_extent.h"
+#include "ext4_inode.h"
+#include "ext4_super.h"
+#include "ext4_crc32.h"
+#include "ext4_balloc.h"
+
+//#define CONFIG_EXTENT_DEBUG_VERBOSE
+
+/**@brief Return the extent tree depth
+ * @param inode_ref I-node reference the tree belongs to
+ * @return Depth of extent tree */
+static inline u16int
+ext4_extent_tree_depth(struct ext4_inode_ref *inode_ref)
+{
+	struct ext4_extent_header *eh;
+	eh = ext4_inode_get_extent_header(inode_ref->inode);
+	return ext4_extent_header_get_depth(eh);
+}
+
+static struct ext4_extent_tail *
+ext4_extent_get_csum_tail(struct ext4_extent_header *eh)
+{
+	return (struct ext4_extent_tail *)(((char *)eh) +
+	    EXT4_EXTENT_TAIL_OFFSET(eh));
+}
+
+static u32int ext4_extent_block_csum(struct ext4_inode_ref *inode_ref,
+				       struct ext4_extent_header *eh)
+{
+	u32int checksum = 0;
+	struct ext4_sblock *sb = &inode_ref->fs->sb;
+
+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+		u32int ino_index = to_le32(inode_ref->index);
+		u32int ino_gen =
+			to_le32(ext4_inode_get_generation(inode_ref->inode));
+		/* First calculate crc32 checksum against fs uuid */
+		checksum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid,
+				sizeof(sb->uuid));
+		/* Then calculate crc32 checksum against inode number
+		 * and inode generation */
+		checksum = ext4_crc32c(checksum, &ino_index,
+				     sizeof(ino_index));
+		checksum = ext4_crc32c(checksum, &ino_gen,
+				     sizeof(ino_gen));
+		/* Finally calculate crc32 checksum against
+		 * the entire extent block up to the checksum field */
+		checksum = ext4_crc32c(checksum, eh,
+		    EXT4_EXTENT_TAIL_OFFSET(eh));
+	}
+	return checksum;
+}
+
+static bool
+ext4_extent_verify_block_csum(struct ext4_inode_ref *inode_ref,
+			      struct ext4_block *block)
+{
+	u16int rootdepth;
+	struct ext4_extent_tail *tail;
+	struct ext4_extent_header *eh;
+
+	rootdepth = ext4_extent_tree_depth(inode_ref);
+
+	if (!ext4_sb_feature_ro_com(&inode_ref->fs->sb,
+				    EXT4_FRO_COM_METADATA_CSUM))
+		return true;
+
+	eh = (struct ext4_extent_header *)block->data;
+	if (ext4_extent_header_get_depth(eh) < rootdepth) {
+		tail = ext4_extent_get_csum_tail(eh);
+		return tail->checksum ==
+		    to_le32(ext4_extent_block_csum(inode_ref, eh));
+	}
+
+	return true;
+}
+
+static void
+ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref,
+			   struct ext4_extent_header *eh)
+{
+	u16int rootdepth;
+	struct ext4_extent_tail *tail;
+
+	rootdepth = ext4_extent_tree_depth(inode_ref);
+
+	if (!ext4_sb_feature_ro_com(&inode_ref->fs->sb,
+				    EXT4_FRO_COM_METADATA_CSUM))
+		return;
+
+	if (ext4_extent_header_get_depth(eh) < rootdepth) {
+		tail = ext4_extent_get_csum_tail(eh);
+		tail->checksum = to_le32(ext4_extent_block_csum(inode_ref, eh));
+	}
+}
+
+#ifdef CONFIG_EXTENT_DEBUG_VERBOSE
+static void
+ext4_extent_print_path(struct ext4_inode_ref *inode_ref,
+		       struct ext4_extent_path *path)
+{
+	u16int rootdepth;
+	struct ext4_extent_path *p;
+
+	rootdepth = ext4_extent_tree_depth(inode_ref);
+	p = path + rootdepth;
+
+	ext4_dbg(DEBUG_EXTENT,
+		 DBG_INFO "Path address: %p\n", path);
+	while (p >= path) {
+		u16int i;
+		u16int entries =
+		    ext4_extent_header_get_nentries(p->header);
+		u16int limit =
+		    ext4_extent_header_get_max_nentries(p->header);
+
+		ext4_dbg(DEBUG_EXTENT,
+DBG_INFO "-- Block: %llud, Depth: %uhd, Entries: %uhd, Limit: %uhd\n",
+			 p->block.lb_id, p->depth, entries, limit);
+		for (i = 0; i < entries; i++) {
+			if (p->depth) {
+				struct ext4_extent_index *index;
+
+				index = EXT4_EXTENT_FIRST_INDEX(p->header) + i;
+				ext4_dbg(DEBUG_EXTENT,
+DBG_INFO "Index: iblock: %ud, fsblock: %llud\n",
+					 ext4_extent_index_get_iblock(index),
+					 ext4_extent_index_get_fblock(index));
+			} else {
+				struct ext4_extent *extent;
+
+				extent = EXT4_EXTENT_FIRST(p->header) + i;
+				ext4_dbg(DEBUG_EXTENT,
+DBG_INFO "Extent: iblock: %ud, fsblock: %llud, count: %uhd\n",
+					 ext4_extent_get_iblock(extent),
+					 ext4_extent_get_fblock(extent),
+					 ext4_extent_get_nblocks(extent));
+			}
+		}
+
+		p--;
+	}
+
+	ext4_dbg(DEBUG_EXTENT,
+		 DBG_INFO "====================\n");
+}
+#else /* CONFIG_EXTENT_DEBUG_VERBOSE */
+#define ext4_extent_print_path(...)
+#endif /* CONFIG_EXTENT_DEBUG_VERBOSE */
+
+/**@brief Binary search in extent index node.
+ * @param header Extent header of index node
+ * @param index  Output value - found index will be set here
+ * @param iblock Logical block number to find in index node */
+static void ext4_extent_binsearch_idx(struct ext4_extent_header *header,
+				      struct ext4_extent_index **index,
+				      ext4_lblk_t iblock)
+{
+	struct ext4_extent_index *r;
+	struct ext4_extent_index *l;
+	struct ext4_extent_index *m;
+
+	u16int nentries = ext4_extent_header_get_nentries(header);
+
+	/* Initialize bounds */
+	l = EXT4_EXTENT_FIRST_INDEX(header) + 1;
+	r = EXT4_EXTENT_FIRST_INDEX(header) + nentries - 1;
+
+	/* Do binary search */
+	while (l <= r) {
+		m = l + (r - l) / 2;
+		ext4_lblk_t eiiblock = ext4_extent_index_get_iblock(m);
+
+		if (iblock < eiiblock)
+			r = m - 1;
+		else
+			l = m + 1;
+	}
+
+	/* Set output value */
+	*index = l - 1;
+}
+
+/**@brief Binary search in extent leaf node.
+ * @param header Extent header of leaf node
+ * @param extent Output value - found extent will be set here,
+ *               or nil if node is empty
+ * @param iblock Logical block number to find in leaf node */
+static void ext4_extent_binsearch(struct ext4_extent_header *header,
+				  struct ext4_extent **extent,
+				  ext4_lblk_t iblock)
+{
+	struct ext4_extent *r;
+	struct ext4_extent *l;
+	struct ext4_extent *m;
+
+	u16int nentries = ext4_extent_header_get_nentries(header);
+
+	if (nentries == 0) {
+		/* this leaf is empty */
+		*extent = nil;
+		return;
+	}
+
+	/* Initialize bounds */
+	l = EXT4_EXTENT_FIRST(header) + 1;
+	r = EXT4_EXTENT_FIRST(header) + nentries - 1;
+
+	/* Do binary search */
+	while (l <= r) {
+		m = l + (r - l) / 2;
+		ext4_lblk_t eiblock = ext4_extent_get_iblock(m);
+
+		if (iblock < eiblock)
+			r = m - 1;
+		else
+			l = m + 1;
+	}
+
+	/* Set output value */
+	*extent = l - 1;
+}
+
+static void
+ext4_extent_path_dirty(struct ext4_inode_ref *inode_ref,
+		       struct ext4_extent_path *path,
+		       u16int depth)
+{
+	u16int rootdepth;
+	rootdepth = ext4_extent_tree_depth(inode_ref);
+
+	if (rootdepth != depth) {
+		struct ext4_extent_path *p;
+		p = path + depth;
+		ext4_extent_block_csum_set(inode_ref, p->header);
+		ext4_trans_set_block_dirty(p->block.buf);
+	} else
+		inode_ref->dirty = true;
+}
+
+static int
+ext4_extent_path_release(struct ext4_inode_ref *inode_ref,
+			 struct ext4_extent_path *path)
+{
+	int ret = 0;
+	u16int i, rootdepth;
+
+	rootdepth = ext4_extent_tree_depth(inode_ref);
+
+	for (i = 0; i < rootdepth; i++) {
+		if (path[i].block.lb_id) {
+			ret = ext4_block_set(inode_ref->fs->bdev,
+					     &path[i].block);
+			if (ret != 0)
+				break;
+		}
+	}
+
+	return ret;
+}
+
+/**@brief Physical block allocation hint for extent tree manipulation
+ * routines
+ * @param inode_ref I-node
+ * @return Physical block allocation hint */
+static ext4_fsblk_t
+ext4_extent_tree_alloc_goal(struct ext4_inode_ref *inode_ref)
+{
+	u32int bgid;
+	struct ext4_sblock *sb;
+
+	sb = &inode_ref->fs->sb;
+	bgid = inode_ref->index / ext4_get32(sb, inodes_per_group);
+
+	/* Currently for allocations from extent tree manipulation routines,
+	 * we try the blocks in the block group the inode table block refers
+	 * to */
+	return ext4_fs_first_bg_block_no(sb, bgid);
+}
+
+/**@brief Physical block allocation hint for data blocks routines
+ * @param inode_ref I-node
+ * @param path      path in the extent tree
+ * @param iblock    the starting logical block of the
+ * mapping to be inserted
+ * @return Physical block allocation hint */
+static ext4_fsblk_t
+ext4_extent_data_alloc_goal(struct ext4_inode_ref *inode_ref,
+			    struct ext4_extent_path *path,
+			    ext4_lblk_t iblock)
+{
+	ext4_fsblk_t ret;
+	struct ext4_extent *ext;
+
+	ext = path[0].extent;
+	if (!ext)
+		/* If there is no mapping yet, we return
+		 * ext4_extent_tree_alloc_goal() as hints */
+		return ext4_extent_tree_alloc_goal(inode_ref) + iblock;
+
+	/* We want the whole file to be continuous. */
+	if (ext4_extent_get_iblock(ext) < iblock)
+		ret = ext4_extent_get_fblock(ext) +
+		    iblock - ext4_extent_get_iblock(ext);
+	else {
+		if (ext4_extent_get_iblock(ext) - iblock >
+		    ext4_extent_get_fblock(ext))
+			ret = ext4_extent_get_fblock(ext);
+		else
+			ret = ext4_extent_get_fblock(ext) -
+			    (ext4_extent_get_iblock(ext) - iblock);
+	}
+
+	return ret;
+}
+
+/**@brief Verify the extent node block is valid
+ * @param inode_ref I-node
+ * @param block     block buffer of the extent node block
+ * @param depth     the depth of extent node wanted
+ * @return true if the block passes verification, otherwise false
+ */
+static bool ext4_extent_block_verify(struct ext4_inode_ref *inode_ref,
+				     struct ext4_block *block,
+				     u16int depth)
+{
+	u32int blocksz;
+	u16int maxnentries;
+	struct ext4_extent_header *eh;
+
+	eh = (struct ext4_extent_header *)block->data;
+	blocksz = ext4_sb_get_block_size(&inode_ref->fs->sb);
+
+	/* Check if the magic number of the extent node header is correct */
+	if (ext4_extent_header_get_magic(eh) != EXT4_EXTENT_MAGIC) {
+		ext4_dbg(DEBUG_EXTENT,
+DBG_ERROR "Extent node block header mismatch! Block number: %llud\n",
+			 block->lb_id);
+		return false;
+	}
+
+	/* Check if the depth field of extent node header matches what the
+	 * caller wants */
+	if (ext4_extent_header_get_depth(eh) != depth) {
+		ext4_dbg(DEBUG_EXTENT,
+DBG_ERROR "Extent node block depth mismatch! Expected: %uhd, Got: %uhd. Block number: %llud\n",
+			 depth, ext4_extent_header_get_depth(eh),
+			 block->lb_id);
+		return false;
+	}
+
+	/* Check if the non-root node contains entries */
+	if (!ext4_extent_header_get_nentries(eh)) {
+		ext4_dbg(DEBUG_EXTENT,
+DBG_ERROR "Extent node block does not contain any entries! Block number: %llud\n",
+			 block->lb_id);
+		return false;
+	}
+
+	/* Make sure that the maximum entries field of the
+	 * extent node header is correct */
+	maxnentries = (blocksz - sizeof(struct ext4_extent_header)) /
+	    sizeof(struct ext4_extent);
+
+	if (ext4_extent_header_get_max_nentries(eh) != maxnentries) {
+		ext4_dbg(DEBUG_EXTENT,
+DBG_ERROR "Incorrect extent node block maximum entries field! Expected: %uhd, Got: %uhd. Block number: %llud\n",
+			 maxnentries,
+			 ext4_extent_header_get_max_nentries(eh),
+			 block->lb_id);
+		return false;
+	}
+
+	/* Check if the checksum of the block is correct */
+	if (!ext4_extent_verify_block_csum(inode_ref,
+					   block)) {
+		ext4_dbg(DEBUG_EXTENT,
+DBG_ERROR "Extent node block checksum failed! Block number: %llud\n",
+			 block->lb_id);
+		return false;
+	}
+
+	/* The block passes verification */
+	return true;
+}
+
+/**@brief Find extent for specified iblock.
+ * This function is used for finding block in the extent tree with
+ * saving the path through the tree for possible future modifications.
+ * @param inode_ref I-node to read extent tree from
+ * @param iblock    Iblock to find extent for
+ * @param ppath  Output value - loaded path from extent tree
+ * @return Error code */
+static int ext4_extent_find_extent(struct ext4_inode_ref *inode_ref,
+				   ext4_lblk_t iblock,
+				   struct ext4_extent_path **ppath)
+{
+	struct ext4_extent_header *eh;
+	int ret;
+	u16int depth;
+	u16int k;
+	struct ext4_extent_path *tpath;
+
+	depth = ext4_extent_tree_depth(inode_ref);
+	eh = ext4_inode_get_extent_header(inode_ref->inode);
+
+	/* Added 2 for possible tree growing (1 extra depth) */
+	tpath = ext4_malloc(sizeof(struct ext4_extent_path) * (depth + 2));
+	if (tpath == nil) {
+		werrstr(Enomem);
+		return -1;
+	}
+
+	/* Zero the path array because we need to make sure that
+	 * lb_id field of block buffer is zero */
+	memset(tpath, 0, sizeof(struct ext4_extent_path) * (depth + 2));
+
+	/* Initialize structure for algorithm start */
+	k = depth;
+	tpath[k].block = inode_ref->block;
+	tpath[k].header = eh;
+
+	/* Walk through the extent tree */
+	while ((depth = ext4_extent_header_get_depth(eh)) != 0) {
+		/* Search index in index node by iblock */
+		ext4_extent_binsearch_idx(tpath[k].header,
+					  &tpath[k].index, iblock);
+
+		tpath[k].depth = depth;
+		tpath[k].extent = nil;
+
+		assert(tpath[k].index != 0);
+
+		/* Load information for the next iteration */
+		u64int fblock =
+		    ext4_extent_index_get_fblock(tpath[k].index);
+
+		struct ext4_block block;
+		ret = ext4_trans_block_get(inode_ref->fs->bdev, &block, fblock);
+		if (ret != 0)
+			goto errout0;
+
+		if (!ext4_extent_block_verify(inode_ref, &block, depth - 1)) {
+			werrstr(Eio);
+			ret = -1;
+			goto errout0;
+		}
+
+		k--;
+
+		eh = (struct ext4_extent_header *)block.data;
+		tpath[k].block = block;
+		tpath[k].header = eh;
+	}
+
+	tpath[k].depth = 0;
+	tpath[k].extent = nil;
+	tpath[k].index = nil;
+
+	/* Find extent in the leaf node */
+	ext4_extent_binsearch(tpath[k].header, &tpath[k].extent,
+			      iblock);
+	*ppath = tpath;
+
+	return 0;
+
+errout0:
+	/* Put loaded blocks */
+	ext4_extent_path_release(inode_ref, tpath);
+
+	/* Destroy temporary data structure */
+	ext4_free(tpath);
+
+	return ret;
+}
+
+/**@brief Reload the paths in a cursor starting from the level having invalid
+ * pointer
+ * @param inode_ref I-node the extent tree resides in
+ * @param path      Path in the extent tree
+ * @param depth     The level to start the reload at
+ * @param right     Try to load the rightmost children
+ * @return 0 on success, Eio on corrupted block, or return values of
+ * ext4_trans_block_get(). */
+int ext4_extent_reload_paths(struct ext4_inode_ref *inode_ref,
+			     struct ext4_extent_path *path,
+			     u16int depth,
+			     bool right)
+{
+	int ret = 0;
+	struct ext4_extent_header *header;
+	struct ext4_extent_path *p;
+
+	/* actually we assume our caller starting from index level instead of
+	 * extent level */
+	assert(depth);
+
+	p = path + depth;
+	header = p->header;
+
+	/* XXX: the path becomes invalid at the first place... */
+	if (p->index > EXT4_EXTENT_LAST_INDEX(header))
+		p->index = EXT4_EXTENT_LAST_INDEX(header);
+
+	/* Start reloading all the paths from the child of the specified level
+	 * toward the leaf */
+	for (; p > path; --p) {
+		struct ext4_extent_path *chldp;
+		struct ext4_extent_index *idx;
+
+		chldp = p - 1;
+		header = p->header; USED(header);
+		idx = p->index;
+
+		/* Release the buffer of child path if the buffer is still
+		 * valid */
+		if (chldp->block.lb_id) {
+			ret = ext4_block_set(inode_ref->fs->bdev, &chldp->block);
+			if (ret != 0)
+				goto out;
+		}
+
+		/* Read the block specified by the physical block field of the
+		 * index */
+		ret = ext4_trans_block_get(inode_ref->fs->bdev, &chldp->block,
+					   ext4_extent_index_get_fblock(idx));
+		if (ret != 0)
+			goto out;
+
+		header = (struct ext4_extent_header *)chldp->block.data;
+		/* Validate the block content before moving on. */
+		if (!ext4_extent_block_verify(inode_ref,
+					      &chldp->block, p->depth - 1)) {
+			werrstr(Eio);
+			ret = -1;
+			goto out;
+		}
+
+		/* Reset the fields of child path */
+		chldp->header = header;
+		chldp->depth = ext4_extent_header_get_depth(header);
+		if (right) {
+			if (chldp->depth) {
+				chldp->index = EXT4_EXTENT_LAST_INDEX(header);
+				chldp->extent = nil;
+			} else {
+				chldp->extent = EXT4_EXTENT_LAST(header);
+				chldp->index = nil;
+			}
+		} else {
+			if (chldp->depth) {
+				chldp->index = EXT4_EXTENT_FIRST_INDEX(header);
+				chldp->extent = nil;
+			} else {
+				chldp->extent = EXT4_EXTENT_FIRST(header);
+				chldp->index = nil;
+			}
+		}
+	}
+out:
+	return ret;
+}
+
+/**@brief Seek to the next extent
+ * @param inode_ref I-node the extent tree resides in
+ * @param path      Path in the extent tree
+ * @param nonextp   Output value - whether the current extent is the
+ * right-most extent already
+ * @return 0 on success, Eio on currupted block, or return values of
+ * ext4_trans_block_get(). */
+int ext4_extent_increment(struct ext4_inode_ref *inode_ref,
+			  struct ext4_extent_path *path,
+			  bool *nonextp)
+{
+	int ret = 0;
+	u16int ptr;
+	bool nonext = true;
+	u16int depth = 0;
+	struct ext4_extent_path *p;
+	u16int rootdepth;
+
+	p = path;
+	rootdepth = ext4_extent_tree_depth(inode_ref);
+
+	/* Iterate the paths from the leaf to the root */
+	while (depth <= rootdepth) {
+		struct ext4_extent_header *header;
+
+		if (p->depth) {
+			ptr = p->index -
+			    EXT4_EXTENT_FIRST_INDEX(p->header);
+		} else {
+			ptr = p->extent -
+			    EXT4_EXTENT_FIRST(p->header);
+		}
+
+		header = p->header;
+
+		if (ptr < ext4_extent_header_get_nentries(header) - 1)
+			/* We found a path with non-rightmost pointer */
+			break;
+
+		/* Move to the parent path */
+		p++;
+		depth++;
+	}
+
+	/* If we can't find a path with a non-rightmost pointer,
+	 * we are already on the last extent, just return in this
+	 * case */
+	if (depth > rootdepth)
+		goto out;
+
+	/* Increment the pointer once we found a path with non-rightmost
+	 * pointer */
+	if (p->depth)
+		p->index++;
+	else
+		p->extent++;
+
+	if (depth) {
+		/* We need to reload the paths to leaf if the path iterator
+		 * is not pointing to the leaf */
+		ret = ext4_extent_reload_paths(inode_ref, path, depth, false);
+		if (ret != 0)
+			goto out;
+	}
+
+	/* Found the next extent */
+	nonext = false;
+out:
+	if (nonextp)
+		*nonextp = nonext;
+
+	return ret;
+}
+
+/**@brief Seek to the previous extent
+ * @param inode_ref I-node the extent tree resides in
+ * @param path      Path in the extent tree
+ * @param noprevp   Output value - whether the current extent is the
+ * left-most extent already
+ * @return 0 on success, Eio on currupted block, or return values of
+ * ext4_trans_block_get(). */
+int
+ext4_extent_decrement(struct ext4_inode_ref *inode_ref,
+		      struct ext4_extent_path *path,
+		      bool *noprevp)
+{
+	int ret = 0;
+	u16int ptr;
+	bool noprev = true;
+	u16int depth = 0;
+	struct ext4_extent_path *p;
+	u16int rootdepth;
+
+	p = path;
+	rootdepth = ext4_extent_tree_depth(inode_ref);
+
+	/* Iterate the paths from the leaf to the root */
+	while (depth <= rootdepth) {
+		if (p->depth) {
+			ptr = p->index -
+			    EXT4_EXTENT_FIRST_INDEX(p->header);
+		} else {
+			ptr = p->extent -
+			    EXT4_EXTENT_FIRST(p->header);
+		}
+
+		if (ptr)
+			/* We found a path with non-leftmost pointer */
+			break;
+
+		/* Move to the parent path */
+		p++;
+		depth++;
+	}
+
+	/* If we can't find a path with a non-leftmost pointer,
+	 * we are already on the first extent, just return in this
+	 * case */
+	if (depth > rootdepth)
+		goto out;
+
+	/* Decrement the pointer once we found a path with non-leftmost
+	 * pointer */
+	if (p->depth)
+		p->index--;
+	else
+		p->extent--;
+
+	if (depth) {
+		/* We need to reload the paths to leaf if the path iterator
+		 * is not pointing to the leaf */
+		ret = ext4_extent_reload_paths(inode_ref, path, depth, true);
+		if (ret != 0)
+			goto out;
+	}
+
+	/* Found the previous extent */
+	noprev = false;
+out:
+	if (noprevp)
+		*noprevp = noprev;
+	return ret;
+}
+
+
+/**@brief Update the index of nodes starting from leaf
+ * @param inode_ref I-node the extent tree resides in
+ * @param path      Path in the extent tree
+ * @param force     set this to true if insertion, deletion or modification
+ * of starting logical block of the first index in a node is made at non-leaf
+ * level */
+static void ext4_extent_update_index(struct ext4_inode_ref *inode_ref,
+				     struct ext4_extent_path *path,
+				     bool force)
+{
+	u16int rootdepth;
+	struct ext4_extent_path *p;
+
+	rootdepth = ext4_extent_tree_depth(inode_ref);
+
+	/* Iterate the paths from the parent of the leaf to the root */
+	for (p = path + 1; p <= path + rootdepth; p++) {
+		struct ext4_extent_path *chldp;
+		struct ext4_extent_header *child_header;
+		intptr chldptr;
+
+		/* This points to the child path of the current path */
+		chldp = p - 1;
+		child_header = chldp->header;
+
+		if (!chldp->depth)
+			chldptr = chldp->extent -
+				    EXT4_EXTENT_FIRST(child_header);
+		else
+			chldptr = chldp->index -
+			            EXT4_EXTENT_FIRST_INDEX(child_header);
+
+		/* If the modification on the child node is not made on the
+		 * first slot of the node, we are done */
+		if (chldptr)
+			break;
+
+		if (p->depth > 1) {
+			struct ext4_extent_index *idx = p->index;
+			struct ext4_extent_index *chldidx =
+					chldp->index;
+			ext4_lblk_t iblock, chldiblock;
+
+			iblock = ext4_extent_index_get_iblock(idx);
+			chldiblock = ext4_extent_index_get_iblock(chldidx);
+
+			if (iblock != chldiblock) {
+				/* If the starting logical block of the first
+				 * index of the child node is modified, we
+				 * update the starting logical block of index
+				 * pointing to the child node */
+				ext4_extent_index_set_iblock(idx, chldiblock);
+				ext4_extent_path_dirty(inode_ref, path,
+						       p->depth);
+			} else if (!force)
+				/* We do not need to continue the iteration */
+				break;
+		} else {
+			struct ext4_extent_index *idx = p->index;
+			struct ext4_extent *chldext = chldp->extent;
+			ext4_lblk_t iblock, chldiblock;
+
+			iblock = ext4_extent_index_get_iblock(idx);
+			chldiblock = ext4_extent_get_iblock(chldext);
+
+			if (iblock != chldiblock) {
+				/* If the starting logical block of the first
+				 * extent of the child node is modified, we
+				 * update the starting logical block of index
+				 * pointing to the child node */
+				ext4_extent_index_set_iblock(idx, chldiblock);
+				ext4_extent_path_dirty(inode_ref, path,
+						       p->depth);
+			} else if (!force)
+				/* We do not need to continue the iteration */
+				break;
+		}
+	};
+}
+
+/**@brief Make the tree grow up by one level
+ * @param inode_ref  I-node the extent tree resides in
+ * @param path       Path in the extent tree
+ * @param new_fblock The newly allocated block for tree growth
+ * @return Error code */
+static int ext4_extent_grow_tree(struct ext4_inode_ref *inode_ref,
+				 struct ext4_extent_path *path,
+				 ext4_fsblk_t newfblock)
+{
+	int rc;
+	u16int ptr;
+	struct ext4_block block;
+	ext4_lblk_t chldiblock;
+	u16int rootdepth;
+	struct ext4_block rootblock;
+	struct ext4_extent_header *rooteh;
+	struct ext4_extent_path *nrootp;
+	struct ext4_extent_path *rootp;
+	u32int blocksz;
+	u16int maxnentries;
+
+	rootdepth = ext4_extent_tree_depth(inode_ref);
+	rootp = path + rootdepth;
+	nrootp = rootp + 1;
+	rootblock = rootp->block;
+	rooteh = rootp->header;
+	blocksz = ext4_sb_get_block_size(&inode_ref->fs->sb);
+
+	/* Store the extent/index offset so that we can recover the
+	 * pointer to it later */
+	if (rootdepth) {
+		ptr = rootp->index -
+		    EXT4_EXTENT_FIRST_INDEX(rootp->header);
+	} else {
+		ptr = rootp->extent -
+		    EXT4_EXTENT_FIRST(rootp->header);
+	}
+	/* Prepare a buffer for newly allocated block */
+	rc = ext4_trans_block_get_noread(inode_ref->fs->bdev, &block, newfblock);
+	if (rc != 0)
+		return rc;
+
+	/* Initialize newly allocated block */
+	memset(block.data, 0, blocksz);
+
+	/* Move data from root to the new block */
+	memcpy(block.data, inode_ref->inode->blocks,
+	       EXT4_INODE_BLOCKS * sizeof(u32int));
+
+	/* Update old root path */
+	rootp->block = block;
+	rootp->header = (struct ext4_extent_header *)block.data;
+	if (rootp->depth) {
+		rootp->index =
+		    EXT4_EXTENT_FIRST_INDEX(rootp->header) +
+		    ptr;
+
+		maxnentries =
+		    (blocksz - sizeof(struct ext4_extent_header)) /
+		    sizeof(struct ext4_extent_index);
+		rootp->extent = nil;
+		chldiblock =
+		    ext4_extent_index_get_iblock(EXT4_EXTENT_FIRST_INDEX(rootp->header));
+	} else {
+		rootp->extent =
+			EXT4_EXTENT_FIRST(rootp->header) +
+			ptr;
+		maxnentries =
+		    (blocksz - sizeof(struct ext4_extent_header)) /
+		    sizeof(struct ext4_extent);
+		rootp->index = nil;
+		chldiblock =
+			ext4_extent_get_iblock(EXT4_EXTENT_FIRST(rootp->header));
+	}
+
+	/* Re-initialize new root metadata */
+	nrootp->depth = rootdepth + 1;
+	nrootp->block = rootblock;
+	nrootp->header = rooteh;
+	nrootp->extent = nil;
+	nrootp->index = EXT4_EXTENT_FIRST_INDEX(nrootp->header);
+
+	ext4_extent_header_set_depth(nrootp->header, nrootp->depth);
+
+	/* Create new entry in root */
+	ext4_extent_header_set_nentries(nrootp->header, 1);
+	ext4_extent_index_set_iblock(nrootp->index, chldiblock);
+	ext4_extent_index_set_fblock(nrootp->index, newfblock);
+
+	/* Since new_root belongs to on-disk inode,
+	 * we don't do checksum here */
+	inode_ref->dirty = true;
+
+	/* Set upper limit for entries count of old root */
+	ext4_extent_header_set_max_nentries(rootp->header, maxnentries);
+
+	ext4_extent_path_dirty(inode_ref, path, rootp->depth);
+
+	return 0;
+}
+
+/**@brief Do splitting on the tree if the leaf is full
+ * @param inode_ref I-node the extent tree resides in
+ * @param path      Path in the extent tree for possible splitting
+ * @param nslots    number of entries that will be inserted to the
+ * leaf in future.
+ * @return Error code */
+static int ext4_extent_split(struct ext4_inode_ref *inode_ref,
+			     struct ext4_extent_path *path,
+			     u16int nslots)
+{
+	int ret;
+	u16int i;
+	ext4_fsblk_t goal;
+	u16int rootdepth;
+	struct ext4_extent_path *p;
+	u32int blocksz;
+	/* Number of new blocks to be allocated */
+	u16int nnewfblocks = 0;
+	/* Number of node to be split */
+	u16int nsplits = 0;
+	/* Array of new blocks allocated */
+	ext4_fsblk_t *newfblocks;
+	/* The index of the right block inserted last time */
+	ext4_lblk_t lastiblock = 0;
+	/* Whether we updated child path to point to the right block
+	 * at the previous round during splitting */
+	bool prevrblock = false;
+
+	blocksz = ext4_sb_get_block_size(&inode_ref->fs->sb);
+	rootdepth = ext4_extent_tree_depth(inode_ref);
+	goal = ext4_extent_tree_alloc_goal(inode_ref);
+
+	/* First calculate how many levels will be touched */
+	for (p = path; p <= path + rootdepth; p++) {
+		u16int entries =
+		    ext4_extent_header_get_nentries(p->header);
+		u16int limit =
+		    ext4_extent_header_get_max_nentries(p->header);
+
+		assert(entries <= limit);
+		if (!p->depth) {
+			if (entries + nslots <= limit)
+				break;
+		} else {
+			if (entries < limit)
+				break;
+		}
+		/* We have to split a node when the tree is full */
+		nnewfblocks++;
+		nsplits++;
+	}
+
+	if (!nnewfblocks)
+		return 0;
+
+	/* Allocate the array for storing newly allocated blocks */
+	newfblocks = ext4_malloc(sizeof(ext4_fsblk_t) * nnewfblocks);
+	if (!newfblocks) {
+		werrstr(Enomem);
+		return -1;
+	}
+
+	for (i = 0; i < nnewfblocks; i++) {
+		ret = ext4_balloc_alloc_block(inode_ref, goal, newfblocks + i);
+		if (ret != 0)
+			return ret;
+	}
+
+	ext4_dbg(DEBUG_EXTENT,
+		 DBG_INFO "nnewfblocks: %uhd rootdepth: %uhd\n",
+		 nnewfblocks, rootdepth);
+
+	/* If number of blocks to be allocated is greater than
+	 * the depth of root we have to grow the tree */
+	if (nnewfblocks == rootdepth + 1) {
+		ext4_dbg(DEBUG_EXTENT, "Growing: \n");
+		nsplits--;
+
+		ret = ext4_extent_grow_tree(inode_ref,
+					    path, newfblocks[rootdepth]);
+		if (ret != 0)
+			goto finish;
+
+		ext4_extent_print_path(inode_ref, path);
+
+		/* If we are moving the in-inode leaf to on-block leaf.
+		 * we do not need further actions. */
+		if (!rootdepth)
+			goto finish;
+
+		++rootdepth; USED(rootdepth);
+	}
+
+	/* Start splitting */
+	p = path;
+	ext4_dbg(DEBUG_EXTENT, DBG_INFO "Start splitting: \n");
+	for (i = 0; i < nsplits; i++, p++) {
+		struct ext4_extent_header *header;
+		u16int entries =
+		    ext4_extent_header_get_nentries(p->header);
+		u16int limit =
+		    ext4_extent_header_get_max_nentries(p->header);
+		/* The entry we start shifting to the right block */
+		u16int split_ptr = entries / 2;
+		/* The number of entry the right block will have */
+		u16int right_entries = entries - split_ptr;
+		/* The current entry */
+		u16int curr_ptr;
+		ext4_lblk_t riblock;
+		struct ext4_block block;
+
+		ret = ext4_trans_block_get_noread(inode_ref->fs->bdev,
+						  &block, newfblocks[i]);
+		if (ret != 0)
+			goto finish;
+
+		/* Initialize newly allocated block and remember it */
+		memset(block.data, 0, blocksz);
+
+		header = (void *)block.data;
+
+		/* Initialize on-disk structure (header) */
+		ext4_extent_header_set_nentries(header,
+				right_entries);
+		ext4_extent_header_set_max_nentries(header, limit);
+		ext4_extent_header_set_magic(header, EXT4_EXTENT_MAGIC);
+		ext4_extent_header_set_depth(header, p->depth);
+		ext4_extent_header_set_generation(header, 0);
+
+		/* Move some entries from old block to new block */
+		if (p->depth) {
+			struct ext4_extent_index *left_index =
+				EXT4_EXTENT_FIRST_INDEX(p->header);
+			struct ext4_extent_index *split_index =
+				left_index + split_ptr;
+
+			riblock = ext4_extent_index_get_iblock(split_index);
+			ext4_dbg(DEBUG_EXTENT,
+				 DBG_INFO "depth: %ud, riblock: %ud\n",
+				 p->depth, riblock);
+
+			curr_ptr = p->index - left_index;
+
+			memcpy(EXT4_EXTENT_FIRST_INDEX(header),
+			       split_index,
+			       right_entries * EXT4_EXTENT_INDEX_SIZE);
+			memset(split_index, 0,
+			       right_entries * EXT4_EXTENT_INDEX_SIZE);
+		} else {
+			struct ext4_extent *left_extent =
+				EXT4_EXTENT_FIRST(p->header);
+			struct ext4_extent *split_extent =
+				left_extent + split_ptr;
+
+			riblock = ext4_extent_get_iblock(split_extent);
+			ext4_dbg(DEBUG_EXTENT,
+				 DBG_INFO "depth: %ud, riblock: %ud\n",
+				 p->depth, riblock);
+
+			curr_ptr = p->extent - left_extent;
+
+			memcpy(EXT4_EXTENT_FIRST(header),
+			       split_extent,
+			       right_entries * EXT4_EXTENT_SIZE);
+			memset(split_extent, 0,
+			       right_entries * EXT4_EXTENT_SIZE);
+		}
+
+		/* Set entries count in left node */
+		ext4_extent_header_set_nentries(p->header,
+						entries - right_entries);
+
+		/* Decide whether we need to update the path to
+		 * point to right block or not */
+		if (curr_ptr >= split_ptr) {
+			/* Update the checksum for the left block */
+			ext4_extent_path_dirty(inode_ref, path, p->depth);
+
+			/* Put back the left block */
+			ret = ext4_block_set(inode_ref->fs->bdev,
+					     &p->block);
+			if (ret != 0)
+				goto finish;
+
+			/* Update pointers in extent path structure to
+			 * point to right block */
+			p->block = block;
+			p->header = (void *)block.data;
+
+			if (p->depth) {
+				p->index =
+				    EXT4_EXTENT_FIRST_INDEX(p->header) +
+				    curr_ptr - split_ptr;
+			} else {
+				p->extent =
+				    EXT4_EXTENT_FIRST(p->header) +
+				    curr_ptr - split_ptr;
+			}
+		} else {
+			/* Update the checksum for the right block */
+			ext4_extent_block_csum_set(inode_ref, header);
+			ext4_trans_set_block_dirty(block.buf);
+
+			/* Put back the right block */
+			ret = ext4_block_set(inode_ref->fs->bdev,
+					     &block);
+			if (ret != 0)
+				goto finish;
+		}
+
+		/* Append an index after the current index */
+		if (p->depth) {
+			struct ext4_extent_index *index = p->index + 1;
+
+			/* If we updated the path to right block in the previous
+			 * round, we update the pointer in the path to point to
+			 * the right block */
+			if (prevrblock)
+				p->index++;
+
+			if (index <= EXT4_EXTENT_LAST_INDEX(p->header)) {
+				u16int nindex =
+					EXT4_EXTENT_LAST_INDEX(p->header) -
+					index + 1;
+
+				memmove(index + 1,
+					index,
+					nindex * EXT4_EXTENT_INDEX_SIZE);
+			}
+			memset(index, 0, EXT4_EXTENT_INDEX_SIZE);
+			ext4_extent_index_set_iblock(index, lastiblock);
+			ext4_extent_index_set_fblock(index, newfblocks[i - 1]);
+
+			entries = ext4_extent_header_get_nentries(p->header);
+			ext4_extent_header_set_nentries(p->header,
+					entries + 1);
+		}
+
+		ext4_extent_path_dirty(inode_ref, path, p->depth);
+
+		/* We may have updated the path to right block in this round */
+		prevrblock = curr_ptr >= split_ptr;
+
+		/* We also update the lastiblock variable to the index of the
+		 * right block */
+		lastiblock = riblock;
+	}
+
+	/* Append an index after the current index */
+	if (p->depth) {
+		struct ext4_extent_index *index = p->index + 1;
+		u16int entries =
+		    ext4_extent_header_get_nentries(p->header);
+
+		/* If we updated the path to right block in the previous
+		 * round, we update the pointer in the path to point to
+		 * the right block */
+		if (prevrblock)
+			p->index++;
+
+		if (index <= EXT4_EXTENT_LAST_INDEX(p->header)) {
+			u16int nindex =
+				EXT4_EXTENT_LAST_INDEX(p->header) -
+				index + 1;
+
+			memmove(index + 1,
+				index,
+				nindex * EXT4_EXTENT_INDEX_SIZE);
+		}
+		memset(index, 0, EXT4_EXTENT_INDEX_SIZE);
+		ext4_extent_index_set_iblock(index, lastiblock);
+		ext4_extent_index_set_fblock(index, newfblocks[i - 1]);
+		ext4_extent_header_set_nentries(p->header,
+				entries + 1);
+
+		ext4_extent_path_dirty(inode_ref, path, p->depth);
+	}
+
+	ret = 0;
+finish:
+	if (ret != 0)
+		for (i = 0; i < nnewfblocks; i++)
+			ext4_balloc_free_block(inode_ref, newfblocks[i]);
+
+	ext4_free(newfblocks);
+	return ret;
+}
+
+/**@brief Insert an extent into the extent tree
+ * @param inode_ref I-node the extent tree resides in
+ * @param path      Path in the extent tree for possible splitting
+ * @param ext       Extent to be inserted
+ * @return Error code */
+static int ext4_extent_insert(struct ext4_inode_ref *inode_ref,
+			      struct ext4_extent_path *path,
+			      struct ext4_extent *ext)
+{
+	int ret;
+	u16int entries;
+	struct ext4_extent_path *p;
+
+	/* Split and grow the tree if necessary */
+	ret = ext4_extent_split(inode_ref, path, 1);
+	if (ret != 0)
+		return ret;
+
+	p = path;
+	entries = ext4_extent_header_get_nentries(p->header);
+
+	ext4_dbg(DEBUG_EXTENT, DBG_INFO "After splitting: \n");
+	ext4_extent_print_path(inode_ref, path);
+
+	if (!p->extent) {
+		p->extent = EXT4_EXTENT_FIRST(p->header);
+	} else {
+		ext4_lblk_t iblock;
+
+		iblock = ext4_extent_get_iblock(p->extent);
+		if (ext4_extent_get_iblock(ext) > iblock)
+			p->extent++;
+	}
+
+	if (p->extent <= EXT4_EXTENT_LAST(p->header)) {
+		u16int nextent =
+			EXT4_EXTENT_LAST(p->header) -
+			p->extent + 1;
+
+		ext4_dbg(DEBUG_EXTENT,
+			 DBG_INFO "%uhd extents to be shifted at leaf\n",
+			 nextent);
+
+		memmove(p->extent + 1,
+			p->extent,
+			nextent * EXT4_EXTENT_SIZE);
+	}
+	memcpy(p->extent, ext, EXT4_EXTENT_SIZE);
+	ext4_extent_header_set_nentries(p->header,
+					entries + 1);
+
+	ext4_extent_path_dirty(inode_ref, path, p->depth);
+
+	ext4_dbg(DEBUG_EXTENT, DBG_INFO "Before updating indice: \n");
+	ext4_extent_print_path(inode_ref, path);
+
+	/* Update the index of the first entry in parents node */
+	ext4_extent_update_index(inode_ref, path, false);
+
+	ext4_dbg(DEBUG_EXTENT, DBG_INFO "At the end: \n");
+	ext4_extent_print_path(inode_ref, path);
+
+	return ret;
+}
+
+/**@brief Delete an item from the node at @depth pointed
+ * @param inode_ref I-node the extent tree resides in
+ * @param path      Path in the extent tree for possible splitting
+ * @param depth     The level of the node to be operated on
+ * @return Error code */
+static void
+ext4_extent_delete_item(struct ext4_inode_ref *inode_ref,
+			struct ext4_extent_path *path,
+			u16int depth)
+{
+	u16int nitems;
+	struct ext4_extent_header *hdr;
+	struct ext4_extent_path *p;
+
+	p = path + depth;
+
+	hdr = p->header;
+	assert(ext4_extent_header_get_nentries(hdr));
+
+	if (p->depth) {
+		struct ext4_extent_index *idx;
+
+		idx = p->index;
+		nitems = EXT4_EXTENT_LAST_INDEX(hdr) - (idx + 1) + 1;
+		if (nitems) {
+			memmove(idx, idx + 1,
+				nitems * EXT4_EXTENT_INDEX_SIZE);
+			memset(EXT4_EXTENT_LAST(hdr), 0,
+			       EXT4_EXTENT_INDEX_SIZE);
+		} else {
+			memset(idx, 0, EXT4_EXTENT_INDEX_SIZE);
+		}
+	} else {
+		struct ext4_extent *ext;
+
+		ext = p->extent;
+		nitems = EXT4_EXTENT_LAST(hdr) - (ext + 1) + 1;
+		if (nitems) {
+			memmove(ext, ext + 1,
+				nitems * EXT4_EXTENT_SIZE);
+			memset(EXT4_EXTENT_LAST(hdr), 0,
+			       EXT4_EXTENT_SIZE);
+		} else {
+			memset(ext, 0, EXT4_EXTENT_SIZE);
+		}
+	}
+
+	nitems = ext4_extent_header_get_nentries(hdr) - 1;
+	ext4_extent_header_set_nentries(hdr,
+					nitems);
+	ext4_extent_path_dirty(inode_ref, path, p->depth);
+}
+
+/**@brief Remove extents in a leaf starting
+ * from the current extent and having
+ * key less than or equal to @toiblock.
+ * @param inode_ref I-node the tree resides in
+ * @param path      Path in the extent tree
+ * @param toiblock  The logical block
+ * @param stopp     Output value to tell whether the caller should
+ * stop deletion. Will be set to true if an extent having key greater
+ * than @toiblock is met.
+ * @return 0 if there is no error, or return values of blocks
+ * freeing routine. */
+static int
+ext4_extent_delete_leaf(struct ext4_inode_ref *inode_ref,
+			struct ext4_extent_path *path,
+			ext4_lblk_t toiblock,
+			bool *stopp)
+{
+	int ret = 0;
+	u16int nitems;
+	struct ext4_extent *ext;
+	struct ext4_extent_header *hdr;
+	struct ext4_extent_path *p;
+
+	p = path;
+	*stopp = false;
+
+	while (1) {
+		bool unwritten;
+		u16int ptr;
+		u16int len;
+		u16int flen;
+		ext4_lblk_t endiblock;
+		ext4_lblk_t startiblock;
+		ext4_fsblk_t blocknr;
+
+		hdr = p->header;
+		nitems = ext4_extent_header_get_nentries(hdr);
+		ptr = p->extent - EXT4_EXTENT_FIRST(hdr);
+
+		assert(nitems > 0);
+
+		ext = p->extent;
+		blocknr = ext4_extent_get_fblock(ext);
+		startiblock = ext4_extent_get_iblock(ext);
+		endiblock = startiblock + ext4_extent_get_nblocks(ext) - 1;
+		len = endiblock - startiblock + 1;
+		unwritten = EXT4_EXT_IS_UNWRITTEN(ext);
+
+		/* We have to stop if the extent's key
+		 * is greater than @toiblock. */
+		if (toiblock < startiblock) {
+			*stopp = true;
+			break;
+		}
+
+		if (toiblock < endiblock) {
+			/* In case @toiblock is smaller than the last
+			 * logical block of the extent, we do not
+			 * need to delete the extent. We modify it only. */
+
+			/* Unmap the underlying blocks. */
+			flen = toiblock - startiblock + 1;
+			ext4_dbg(DEBUG_EXTENT,
+				 DBG_INFO "Freeing: %llud:%uhd\n",
+				 blocknr, flen);
+			ext4_balloc_free_blocks(inode_ref, blocknr, flen);
+
+			/* Adjust the starting block and length of the
+			 * current extent. */
+			blocknr += flen;
+			startiblock = toiblock + 1;
+			len = endiblock - startiblock + 1;
+			ext4_extent_set_iblock(ext, startiblock);
+			ext4_extent_set_nblocks(ext, len, unwritten);
+			ext4_extent_set_fblock(ext, blocknr);
+
+			ext4_extent_path_dirty(inode_ref, path, p->depth);
+
+			*stopp = 1;
+			break;
+		}
+
+		/* Delete the extent pointed to by the path. */
+		ext4_extent_delete_item(inode_ref, path, 0);
+		nitems--;
+
+		/* Unmap the underlying blocks. */
+		flen = len;
+		ext4_dbg(DEBUG_EXTENT,
+			 DBG_INFO "Freeing: %llud:%uhd\n",
+			 blocknr, flen);
+		ext4_balloc_free_blocks(inode_ref, blocknr, flen);
+
+		/* There are no more items we could delete. */
+		if (ptr >= nitems)
+			break;
+	}
+	return ret;
+}
+
+/**@brief Remove the current index at specified level.
+ * @param cur   Cursor to an extent tree
+ * @param depth The level where deletion takes place at
+ * @return 0 if there is no error, or return values of blocks
+ * freeing routine. */
+static int
+ext4_extent_delete_node(struct ext4_inode_ref *inode_ref,
+			struct ext4_extent_path *path,
+			u16int depth)
+{
+	int ret = 0;
+	ext4_fsblk_t fblock;
+	struct ext4_extent_index *idx;
+	struct ext4_extent_header *hdr;
+	struct ext4_extent_path *p;
+
+	/* If we leave nothing in the node after deletion of
+	 * an item, we free the block and delete the index
+	 * of the node. Get the respective key of the node
+	 * in the parent level */
+	p = path + depth;
+	hdr = p->header;
+	assert(ext4_extent_header_get_nentries(hdr) > 0);
+	idx = p->index;
+	fblock = ext4_extent_index_get_fblock(idx);
+
+	/* Delete the index pointed to by the path. */
+	ext4_extent_delete_item(inode_ref, path, depth);
+
+	/* Free the block of it. */
+	ext4_dbg(DEBUG_EXTENT,
+		 DBG_INFO "Freeing: %llud:%uhd\n",
+		 fblock, 1);
+	ext4_balloc_free_blocks(inode_ref, fblock, 1);
+
+	return ret;
+}
+
+/**@brief Delete the mapping in extent tree starting from \p fromiblock to
+ * \p toiblock inclusively.
+ * @param cur Cursor to an extent tree
+ * @return 0 on success, ENOENT if there is no item to be deleted,
+ * return values of ext4_ext_increment(), ext4_ext_insert(),
+ * ext4_ext_delete_leaf(), ext4_ext_delete_node() ext4_ext_reload_paths(),
+ * ext4_ext_tree_shrink(). Cursor MUST be discarded after deletion.
+ */
+int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref,
+			     ext4_lblk_t fromiblock,
+			     ext4_lblk_t toiblock)
+{
+	int ret;
+	u16int nitems;
+	int rootdepth;
+	struct ext4_extent_header *hdr;
+	struct ext4_extent *ext;
+	ext4_lblk_t endiblock;
+	ext4_lblk_t startiblock;
+	struct ext4_extent_path *path, *p;
+
+	rootdepth = ext4_extent_tree_depth(inode_ref);
+
+	ret = ext4_extent_find_extent(inode_ref, fromiblock, &path);
+	if (ret != 0)
+		return ret;
+
+	p = path;
+	hdr = p->header; USED(hdr);
+
+	/* We return 0 even if the whole extent tree is empty. */
+	if (!ext4_extent_header_get_nentries(path->header))
+		goto out;
+
+	/* Calculate the last logical block of the current extent. */
+	ext4_dbg(DEBUG_EXTENT, DBG_INFO "At start of remove_space: \n");
+	ext4_extent_print_path(inode_ref, path);
+
+	ext = p->extent;
+	startiblock = ext4_extent_get_iblock(ext);
+	endiblock = startiblock + ext4_extent_get_nblocks(ext) - 1;
+
+	ext4_dbg(DEBUG_EXTENT,
+		 DBG_INFO "Extent: %ud:%uhd\n",
+		 startiblock, endiblock);
+
+	if (fromiblock > endiblock) {
+		bool nonext;
+
+		/* The last logical block of the current extent is smaller
+		 * than the first logical block we are going to remove,
+		 * thus we increment the extent pointer of the cursor. */
+
+		/* Increment the extent pointer to point to the
+		 * next extent. */
+		ret = ext4_extent_increment(inode_ref, path, &nonext);
+		if (ret != 0)
+			goto out;
+
+		/* The current extent is already the last extent in
+		 * the tree, so we just return success here. */
+		if (nonext)
+			goto out;
+	} else if (fromiblock > startiblock) {
+		bool unwritten;
+		u16int len;
+
+		/* @fromiblock is in the range of the current extent,
+		 * but does not sit right on the starting block.
+		 *
+		 * In this case we need to modify the current extent.
+		 * and free some blocks, since we do not really want
+		 * to remove and reinsert a new one. */
+
+		len = fromiblock - startiblock;
+		unwritten = EXT4_EXT_IS_UNWRITTEN(ext);
+		ext4_extent_set_nblocks(ext, len, unwritten);
+
+		ext4_extent_path_dirty(inode_ref, path, p->depth);
+
+		/* Free the range of blocks starting from @fromiblock
+		 * up to either @endiblock or @toiblock. */
+		if (toiblock < endiblock) {
+			u16int flen;
+			ext4_fsblk_t blocknr;
+			struct ext4_extent next;
+
+			/* In case we free up space inside an extent
+			 * while not touching both ends, we need to
+			 * unavoidably insert a new extent right after
+			 * the modified current extent, and that may
+			 * cause tree splitting. */
+
+			/* Now we need to free up space first. */
+			flen = toiblock - fromiblock + 1;
+			blocknr = ext4_extent_get_fblock(ext) + len;
+			ext4_dbg(DEBUG_EXTENT,
+				 DBG_INFO "Freeing: %llud:%uhd\n",
+				 blocknr, flen);
+			ext4_balloc_free_blocks(inode_ref, blocknr, flen);
+
+			blocknr += flen;
+			startiblock = fromiblock + flen;
+			len = endiblock - startiblock + 1;
+
+			ext4_extent_set_iblock(&next, startiblock);
+			ext4_extent_set_nblocks(&next, len, unwritten);
+			ext4_extent_set_fblock(&next, blocknr);
+			ret = ext4_extent_insert(inode_ref, path, &next);
+
+			/* After we free up the space and insert a new
+			 * extent, we are done. */
+			goto out;
+		} else {
+			bool nonext;
+			u16int flen;
+			ext4_fsblk_t blocknr;
+
+			/* Otherwise we do not need any insertion,
+			 * which also means that no extra space may be
+			 * allocated for tree splitting. */
+			flen = endiblock - fromiblock + 1;
+			blocknr = ext4_extent_get_fblock(ext) + len;
+
+			/* Now we need to free up space first. */
+			ext4_dbg(DEBUG_EXTENT,
+				 DBG_INFO "Freeing: %llud:%uhd\n",
+				 blocknr, flen);
+			ext4_balloc_free_blocks(inode_ref, blocknr, flen);
+
+			/* Increment the extent pointer to point to the
+			 * next extent. */
+			ret = ext4_extent_increment(inode_ref, path, &nonext);
+			if (ret != 0 || nonext)
+				goto out;
+		}
+	}
+
+	while (p <= path + rootdepth) {
+		struct ext4_extent_path *chldp;
+
+		hdr = p->header;
+
+		if (!p->depth) {
+			bool stop;
+
+			/* Delete as much extents as we can. */
+			ret = ext4_extent_delete_leaf(inode_ref,
+						      path,
+						      toiblock,
+						      &stop);
+			if (ret != 0)
+				goto out;
+
+			if (stop) {
+				/* Since the current extent has its logical
+				 * block number greater than @toiblock,
+				 * we are done. */
+				break;
+			}
+			/* Since there are no more items in the leaf,
+			 * we have to go one level above to switch to the
+			 * next leaf. */
+			p++;
+			continue;
+		}
+
+		chldp = p - 1;
+		nitems = ext4_extent_header_get_nentries(chldp->header);
+
+		/* Now we don't need the children path anymore. */
+		ext4_block_set(inode_ref->fs->bdev, &chldp->block);
+		if (!nitems) {
+			ret = ext4_extent_delete_node(inode_ref, path, p->depth);
+			if (ret != 0)
+				goto out;
+
+			if (p->index > EXT4_EXTENT_LAST_INDEX(hdr)) {
+				/* Go one level above */
+				p++;
+			} else {
+				ret = ext4_extent_reload_paths(inode_ref, path, p->depth, false);
+				if (ret != 0)
+					goto out;
+				/* Go to the bottom level (aka the leaf). */
+				p = path;
+			}
+		} else {
+			if (p->index == EXT4_EXTENT_LAST_INDEX(hdr)) {
+				/* Go one level above */
+				p++;
+			} else {
+				p->index++;
+				ret = ext4_extent_reload_paths(inode_ref, path, p->depth, false);
+				if (ret != 0)
+					goto out;
+				/* Go to the bottom level (aka the leaf). */
+				p = path;
+			}
+		}
+	}
+
+	/* The above code can only exit in either situations:
+	 *
+	 * 1. We found that there is no more extents at the right
+	 *    (p < path)
+	 * 2. We found that the next extent has key larger than @toiblock
+	 *    (p at leaf) */
+	assert(p == path || p > path + rootdepth);
+	if (p == path) {
+		/* We might have removed the leftmost key in the node,
+		 * so we need to update the first key of the right
+		 * sibling at every level until we meet a non-leftmost
+		 * key. */
+		ext4_extent_update_index(inode_ref, path, true);
+	} else {
+		/* Put loaded blocks. We won't double-release
+		 * in this case since the depth of tree will
+		 * be reset to 0. */
+		ext4_extent_path_release(inode_ref, path);
+
+		hdr = ext4_inode_get_extent_header(inode_ref->inode);
+		if (!ext4_extent_header_get_nentries(hdr)) {
+			/* For empty root we need to make sure that the
+			 * depth of the root level is 0. */
+			ext4_extent_header_set_nentries(hdr, 0);
+			ext4_extent_header_set_depth(hdr, 0);
+			inode_ref->dirty = true;
+		}
+	}
+
+out:
+	/* Put loaded blocks */
+	ext4_extent_path_release(inode_ref, path);
+
+	/* Destroy temporary data structure */
+	ext4_free(path);
+
+	return ret;
+}
+
+/**@brief Zero a range of blocks
+ * @param inode_ref   I-node
+ * @param fblock      starting block number to be zeroed
+ * @param nblocks     number of blocks to be zeroed
+ * @return Error code */
+static int ext4_extent_zero_fblocks(struct ext4_inode_ref *inode_ref,
+				    ext4_fsblk_t fblock,
+				    ext4_lblk_t nblocks)
+{
+	int ret = 0;
+	ext4_lblk_t i;
+	u32int blocksz;
+
+	blocksz = ext4_sb_get_block_size(&inode_ref->fs->sb);
+	for (i = 0; i < nblocks; i++) {
+		struct ext4_block bh = EXT4_BLOCK_ZERO();
+		ret = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh,
+						  fblock + i);
+		if (ret != 0)
+			break;
+
+		memset(bh.data, 0, blocksz);
+		ext4_trans_set_block_dirty(bh.buf);
+		ret = ext4_block_set(inode_ref->fs->bdev, &bh);
+		if (ret != 0)
+			break;
+	}
+	return ret;
+}
+
+/**@brief Convert unwritten mapping to written one
+ * @param inode_ref   I-node
+ * @param path        Path in the extent tree
+ * @param iblock      starting logical block to be converted
+ * @param nblocks     number of blocks to be converted
+ * @return Error code */
+int ext4_extent_convert_written(struct ext4_inode_ref *inode_ref,
+				struct ext4_extent_path *path,
+				ext4_lblk_t iblock,
+				ext4_lblk_t nblocks)
+{
+	int ret;
+	ext4_lblk_t eiblock;
+	ext4_lblk_t enblocks;
+	ext4_fsblk_t efblock;
+	struct ext4_extent *ext;
+
+	ext = path[0].extent;
+	assert(ext);
+
+	eiblock = ext4_extent_get_iblock(ext);
+	enblocks = ext4_extent_get_nblocks(ext);
+	efblock = ext4_extent_get_fblock(ext);
+	assert(EXT4_EXTENT_IN_RANGE(iblock, eiblock, enblocks));
+
+	/* There are four cases we need to handle */
+	if (iblock == eiblock && nblocks == enblocks) {
+		/* Case 1: the whole extent has to be converted.
+		 * This is the simplest scenario. We just need
+		 * to mark the extent "written", and zero the
+		 * blocks covered by the extent */
+		ret = ext4_extent_zero_fblocks(inode_ref, efblock, enblocks);
+		if (ret != 0)
+			return ret;
+		EXT4_EXT_SET_WRITTEN(ext);
+		ext4_extent_path_dirty(inode_ref, path, 0);
+	} else if (iblock == eiblock) {
+		/* Case 2: convert the first part of the extent to written
+		 * and insert an unwritten extent after that */
+		ext4_lblk_t newiblock;
+		ext4_lblk_t newnblocks;
+		ext4_fsblk_t newfblock;
+		struct ext4_extent insext;
+
+		/* The new extent we are going to insert */
+		newiblock = eiblock + nblocks;
+		newnblocks = eiblock + enblocks - newiblock;
+		newfblock = efblock + nblocks;
+
+		/* Zero the blocks covered by the first part of the extent */
+		ret = ext4_extent_zero_fblocks(inode_ref,
+					       efblock + iblock - eiblock,
+					       nblocks);
+		if (ret != 0)
+			return ret;
+
+		/* Trim the current extent and convert the extent to written */
+		ext4_extent_set_nblocks(ext, enblocks - nblocks, false);
+		ext4_extent_path_dirty(inode_ref, path, 0);
+
+		/* Insert the new extent */
+		ext4_extent_set_iblock(&insext, newiblock);
+		ext4_extent_set_nblocks(&insext, newnblocks, true);
+		ext4_extent_set_fblock(&insext, newfblock);
+		ret = ext4_extent_insert(inode_ref, path, &insext);
+		if (ret != 0)
+			/* In case when something happens during insertion
+			 * we revert the trimming of the current extent */
+			ext4_extent_set_nblocks(ext, nblocks, true);
+	} else if (iblock + nblocks == eiblock + enblocks) {
+		/* Case 3: convert the second part of the extent to written.
+		 * We insert an written extent after the current extent */
+		ext4_lblk_t newiblock;
+		ext4_lblk_t newnblocks;
+		ext4_fsblk_t newfblock;
+		struct ext4_extent insext;
+
+		/* The new extent we are going to insert */
+		newiblock = iblock;
+		newnblocks = nblocks;
+		newfblock = efblock + iblock - eiblock;
+
+		/* Zero the blocks covered by the first part of the extent */
+		ret = ext4_extent_zero_fblocks(inode_ref, newfblock, newnblocks);
+		if (ret != 0)
+			return ret;
+
+		/* Trim the current extent */
+		ext4_extent_set_nblocks(ext, enblocks - nblocks, true);
+		ext4_extent_path_dirty(inode_ref, path, 0);
+
+		/* Insert the new extent */
+		ext4_extent_set_iblock(&insext, newiblock);
+		ext4_extent_set_nblocks(&insext, newnblocks, false);
+		ext4_extent_set_fblock(&insext, newfblock);
+		ret = ext4_extent_insert(inode_ref, path, &insext);
+		if (ret != 0)
+			/* In case when something happens during insertion
+			 * we revert the trimming of the current extent */
+			ext4_extent_set_nblocks(ext, nblocks, true);
+	} else {
+		/* Case 4: convert the middle part of the extent to written.
+		 * We insert one written extent, follow by an unwritten
+		 * extent */
+		ext4_lblk_t newiblock[2];
+		ext4_lblk_t newnblocks[2];
+		ext4_fsblk_t newfblock[2];
+		struct ext4_extent insext;
+
+		/* The new extents we are going to insert */
+		newiblock[0] = iblock;
+		newnblocks[0] = nblocks;
+		newfblock[0] = efblock + iblock - eiblock;
+		newiblock[1] = iblock + nblocks;
+		newnblocks[1] = eiblock + enblocks - newiblock[1];
+		newfblock[1] = newfblock[0] + nblocks;
+
+		/* Zero the blocks covered by the written extent */
+		ret = ext4_extent_zero_fblocks(inode_ref, newfblock[0],
+					       newnblocks[0]);
+		if (ret != 0)
+			return ret;
+
+		/* We don't want to fail in the middle because we
+		 * run out of space. From now on the subsequent
+		 * insertions cannot fail */
+		ret = ext4_extent_split(inode_ref, path, 2);
+		if (ret != 0)
+			return ret;
+
+		/* Trim the current extent */
+		ext4_extent_set_nblocks(ext,
+					enblocks - newnblocks[0] - newnblocks[1],
+					true);
+		ext4_extent_path_dirty(inode_ref, path, 0);
+
+		/* Insert the written extent first */
+		ext4_extent_set_iblock(&insext, newiblock[0]);
+		ext4_extent_set_nblocks(&insext, newnblocks[0], false);
+		ext4_extent_set_fblock(&insext, newfblock[0]);
+		ret = ext4_extent_insert(inode_ref, path, &insext);
+		assert(ret == 0);
+
+		/* Then insert the unwritten extent */
+		ext4_extent_set_iblock(&insext, newiblock[1]);
+		ext4_extent_set_nblocks(&insext , newnblocks[1], true);
+		ext4_extent_set_fblock(&insext, newfblock[1]);
+		ret = ext4_extent_insert(inode_ref, path, &insext);
+		assert(ret == 0);
+	}
+	return ret;
+}
+
+/**@brief Check if the second extent can be appended to the first extent
+ * @param ext  the first extent
+ * @param ext2 the second extent
+ * @return true if the two extents can be merged, otherwise false */
+static bool ext4_extent_can_append(struct ext4_extent *ext,
+				   struct ext4_extent *ext2)
+{
+	bool unwritten;
+	ext4_lblk_t eiblock[2];
+	ext4_lblk_t enblocks[2];
+	ext4_fsblk_t efblock[2];
+
+	eiblock[0] = ext4_extent_get_iblock(ext);
+	enblocks[0] = ext4_extent_get_nblocks(ext);
+	efblock[0] = ext4_extent_get_fblock(ext);
+	eiblock[1] = ext4_extent_get_iblock(ext2);
+	enblocks[1] = ext4_extent_get_nblocks(ext2);
+	efblock[1] = ext4_extent_get_fblock(ext2);
+
+	/* We can't merge an unwritten extent with a written
+	 * extent */
+	if (EXT4_EXT_IS_UNWRITTEN(ext) != EXT4_EXT_IS_UNWRITTEN(ext2))
+		return false;
+
+	unwritten = EXT4_EXT_IS_UNWRITTEN(ext);
+
+	/* Since the starting logical block of the second
+	 * extent is greater than that of the first extent,
+	 * we check whether we can append the second extent
+	 * to the first extent */
+	if (eiblock[0] + enblocks[0] != eiblock[1] ||
+	    efblock[0] + enblocks[0] != efblock[1])
+		/* If the two extents are not continuous
+		 * in terms of logical block range and
+		 * physical block range, we return false */
+		return false;
+
+	/* Check if the total number of blocks of the two extents are
+	 * too long.
+	 * Note: the maximum length of unwritten extent is shorter than
+	 * written extent by one block */
+	if (unwritten) {
+		if (enblocks[0] + enblocks[1] > EXT4_EXT_MAX_LEN_UNWRITTEN)
+			return false;
+	} else {
+		if (enblocks[0] + enblocks[1] > EXT4_EXT_MAX_LEN_WRITTEN)
+			return false;
+	}
+
+	/* The second extent can be appended to the first extent */
+	return true;
+}
+
+/**@brief Check if the second extent can be prepended to the first extent
+ * @param ext  the first extent
+ * @param ext2 the second extent
+ * @return true if the two extents can be merged, otherwise false */
+static bool ext4_extent_can_prepend(struct ext4_extent *ext,
+				    struct ext4_extent *ext2)
+{
+	bool unwritten;
+	ext4_lblk_t eiblock[2];
+	ext4_lblk_t enblocks[2];
+	ext4_fsblk_t efblock[2];
+
+	eiblock[0] = ext4_extent_get_iblock(ext);
+	enblocks[0] = ext4_extent_get_nblocks(ext);
+	efblock[0] = ext4_extent_get_fblock(ext);
+	eiblock[1] = ext4_extent_get_iblock(ext2);
+	enblocks[1] = ext4_extent_get_nblocks(ext2);
+	efblock[1] = ext4_extent_get_fblock(ext2);
+
+	/* We can't merge an unwritten extent with a written
+	 * extent */
+	if (EXT4_EXT_IS_UNWRITTEN(ext) != EXT4_EXT_IS_UNWRITTEN(ext2))
+		return false;
+
+	unwritten = EXT4_EXT_IS_UNWRITTEN(ext);
+
+	/* Since the starting logical block of the second
+	 * extent is smaller than that of the first extent,
+	 * we check whether we can prepend the second extent
+	 * to the first extent */
+	if (eiblock[1] + enblocks[1] != eiblock[0] ||
+	    efblock[1] + enblocks[1] != efblock[0])
+		/* If the two extents are not continuous
+		 * in terms of logical block range and
+		 * physical block range, we return false */
+		return false;
+
+	/* Check if the total number of blocks of the two extents are
+	 * too long.
+	 * Note: the maximum length of unwritten extent is shorter than
+	 * written extent by one block */
+	if (unwritten) {
+		if (enblocks[0] + enblocks[1] > EXT4_EXT_MAX_LEN_UNWRITTEN)
+			return false;
+	} else {
+		if (enblocks[0] + enblocks[1] > EXT4_EXT_MAX_LEN_WRITTEN)
+			return false;
+	}
+
+	/* The second extent can be prepended to the first extent */
+	return true;
+}
+
+/**@brief Allocate multiple number of blocks
+ * @param inode_ref I-node
+ * @param goal      physical block allocation hint
+ * @param nblocks   number of blocks to be allocated
+ * @param fblockp   Output value - starting physical block number
+ * @param nblocksp  Output value - the number of blocks allocated
+ * @return Error code */
+static int
+ext4_extent_alloc_datablocks(struct ext4_inode_ref *inode_ref,
+			     ext4_fsblk_t goal,
+			     ext4_lblk_t nblocks,
+			     ext4_fsblk_t *fblockp,
+			     ext4_lblk_t *nblocksp)
+{
+	int ret = 0;
+	ext4_lblk_t i;
+	ext4_fsblk_t retfblock;
+	ext4_lblk_t retnblocks = 0;
+
+	for (i = 0; i < nblocks; ++i, ++retnblocks) {
+		bool free = false;
+
+		if (!i) {
+			/* We allocate the first block by using
+			 * ext4_balloc_alloc_block() so that we
+			 * can pass allocation hint to the block
+			 * allocator */
+			ret = ext4_balloc_alloc_block(inode_ref,
+						      goal,
+						      &retfblock);
+			if (ret == 0)
+				free = true;
+		} else {
+			ext4_fsblk_t blockscnt;
+
+			/* Do a check to make sure that we won't look into
+			 * a block number larger than the total number of
+			 * blocks we have on this filesystem */
+			blockscnt = ext4_sb_get_blocks_cnt(&inode_ref->fs->sb);
+			if (retfblock + i < blockscnt) {
+				ret = ext4_balloc_try_alloc_block(inode_ref,
+				    retfblock + i, &free);
+			} else
+				free = false;
+		}
+
+		/* Stop trying on the next block if we encounter errors, or
+		 * if there is insufficient space, or if we can't allocate
+		 * blocks continuously */
+		if (ret != 0 || !free)
+			break;
+	}
+
+	if (ret == 0) {
+		*fblockp = retfblock;
+		if (nblocksp)
+			*nblocksp = nblocks;
+	}
+	return ret;
+}
+
+/**@brief Extent-based blockmap manipulation
+ * @param inode_ref   I-node
+ * @param iblock      starting logical block of the inode
+ * @param max_nblocks maximum number of blocks to get from/allocate to blockmap
+ * @param resfblockp  return physical block address of the first block of an
+ * extent
+ * @param create      true if caller wants to insert mapping or convert
+ * unwritten mapping to written one
+ * @param resnblocksp return number of blocks in an extent (must be smaller than
+ * \p max_nblocks)
+ * @return Error code*/
+int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref,
+			   ext4_lblk_t iblock,
+			   ext4_lblk_t max_nblocks,
+			   ext4_fsblk_t *resfblockp,
+			   bool create,
+			   ext4_lblk_t *resnblocksp)
+{
+	int ret;
+	struct ext4_extent_path *path;
+	struct ext4_extent *ext;
+	struct ext4_extent insext;
+	ext4_lblk_t eiblock;
+	ext4_lblk_t enblocks;
+	ext4_fsblk_t efblock;
+	ext4_fsblk_t resfblock;
+	ext4_lblk_t resnblocks = 0;
+	ext4_fsblk_t goal;
+
+	/* Seek to the corresponding extent */
+	ret = ext4_extent_find_extent(inode_ref, iblock, &path);
+	if (ret != 0)
+		return ret;
+
+	ext = path[0].extent;
+	if (ext) {
+		/* The extent tree is not empty */
+		eiblock = ext4_extent_get_iblock(ext);
+		enblocks = ext4_extent_get_nblocks(ext);
+		efblock = ext4_extent_get_fblock(ext);
+		if (EXT4_EXTENT_IN_RANGE(iblock, eiblock, enblocks)) {
+			/* The extent exists and logical block requested falls
+			 * into the range of the extent */
+			resfblock = efblock + iblock - eiblock;
+			resnblocks = eiblock + enblocks - iblock;
+
+			/* Trim the result if it is larger than the maximum
+			 * length the caller wants */
+			if (resnblocks > max_nblocks)
+				resnblocks = max_nblocks;
+
+			if (EXT4_EXT_IS_UNWRITTEN(ext)) {
+				if (create)
+					/* Convert the extent to written extent
+					 * if the extent is unwritten extent */
+					ret = ext4_extent_convert_written(inode_ref,
+									  path,
+									  iblock,
+									  resnblocks);
+				else
+					/* We are not asked to modify the blockmap
+					 * so we just return a hole */
+					resfblock = 0;
+			}
+			goto cleanup;
+		}
+		if (!create) {
+			/* Don't waste time on finding the next extent if we
+			 * are not asked to insert mapping, just return a
+			 * hole */
+			resfblock = 0;
+			resnblocks = 1;
+			goto cleanup;
+		}
+		if (ext4_extent_get_iblock(ext) < iblock) {
+			/* Since the logical block of current extent is smaller
+			 * the requested logical block, we seek to the next
+			 * extent to find the maximum number of blocks we can
+			 * allocate without hitting the starting logical block
+			 * of the next extent */
+			bool nonext;
+
+			/* Go to the next extent */
+			ret = ext4_extent_increment(inode_ref, path, &nonext);
+			if (ret != 0)
+				goto cleanup;
+
+			if (!nonext) {
+				/* We successfully reach the next extent */
+				bool noprev;
+				ext4_lblk_t neiblock;
+
+				ext = path[0].extent;
+
+				/* The next extent must start at greater logical
+				 * block number */
+				assert(ext4_extent_get_iblock(ext) >
+				    iblock);
+
+				/* Calculate the maximum number of blocks we
+				 * can allocate without overlapping with the
+				 * next extent */
+				neiblock = ext4_extent_get_iblock(ext);
+				if (max_nblocks > neiblock - iblock)
+					max_nblocks = neiblock - iblock;
+
+				/* Go back to the previous extent */
+				ret = ext4_extent_decrement(inode_ref, path,
+							    &noprev);
+				if (ret != 0)
+					goto cleanup;
+				assert(!noprev);
+				ext = path[0].extent;
+			}
+		}
+	}
+
+	/* Return a hole if we are not asked to insert mapping */
+	if (!create) {
+		resfblock = 0;
+		resnblocks = 1;
+		goto cleanup;
+	}
+
+	/* Multiple data blocks allocation */
+	goal = ext4_extent_data_alloc_goal(inode_ref, path, iblock);
+	ret = ext4_extent_alloc_datablocks(inode_ref, goal, max_nblocks,
+					   &resfblock, &max_nblocks);
+	if (ret != 0)
+		goto cleanup;
+
+	ext4_extent_set_iblock(&insext, iblock);
+	ext4_extent_set_nblocks(&insext, max_nblocks, false);
+	ext4_extent_set_fblock(&insext, resfblock);
+
+	if (ext && ext4_extent_can_append(ext, &insext)) {
+		/* Clang won't complain, it's just to make gcc happy */
+		enblocks = ext4_extent_get_nblocks(ext);
+
+		/* If we can append this extent to the current extent */
+		ext4_extent_set_nblocks(ext, enblocks + max_nblocks,
+					EXT4_EXT_IS_UNWRITTEN(ext));
+
+		ext4_extent_path_dirty(inode_ref, path, 0);
+	} else if (ext && ext4_extent_can_prepend(ext, &insext)) {
+		/* Clang won't complain, it's just to make gcc happy */
+		enblocks = ext4_extent_get_nblocks(ext);
+
+		/* If we can prepend this extent to the current extent */
+		ext4_extent_set_iblock(ext, iblock);
+		ext4_extent_set_nblocks(ext, enblocks + max_nblocks,
+					EXT4_EXT_IS_UNWRITTEN(ext));
+		ext4_extent_set_fblock(ext, resfblock);
+
+		/* If we are working on the first extent in the
+		 * first leaf (in case we are actually prepending
+		 * mappings) we need to update the index of nodes.
+		 *
+		 * NOTE: Since we don't seek to the next extent and
+		 * try to modify it, prepending should not happen at
+		 * any leaves except the first extent of the first leaf */
+		ext4_extent_update_index(inode_ref, path, false);
+		ext4_extent_path_dirty(inode_ref, path, 0);
+	} else {
+		/* Finally, insert a new extent into the extent tree */
+		ret = ext4_extent_insert(inode_ref, path, &insext);
+		if (ret != 0)
+			ext4_balloc_free_blocks(inode_ref, resfblock,
+						max_nblocks);
+	}
+
+	resnblocks = max_nblocks;
+
+cleanup:
+	/* Put loaded blocks */
+	ext4_extent_path_release(inode_ref, path);
+
+	/* Destroy temporary data structure */
+	ext4_free(path);
+
+	if (ret == 0) {
+		if (resfblockp)
+			*resfblockp = resfblock;
+		if (resnblocksp)
+			*resnblocksp = resnblocks;
+	}
+
+	return ret;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_fs.c
@@ -1,0 +1,1699 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_trans.h"
+#include "ext4_fs.h"
+#include "ext4_blockdev.h"
+#include "ext4_super.h"
+#include "ext4_crc32.h"
+#include "ext4_block_group.h"
+#include "ext4_balloc.h"
+#include "ext4_bitmap.h"
+#include "ext4_inode.h"
+#include "ext4_ialloc.h"
+#include "ext4_extent.h"
+
+int ext4_fs_init(struct ext4_fs *fs, struct ext4_blockdev *bdev,
+		 bool read_only)
+{
+	int r, i;
+	u16int tmp;
+	u32int bsize;
+
+	assert(fs && bdev);
+
+	fs->bdev = bdev;
+
+	fs->read_only = read_only;
+
+	r = ext4_sb_read(fs->bdev, &fs->sb);
+	if (r != 0)
+		return r;
+
+	if (!ext4_sb_check(&fs->sb)) {
+		werrstr("superblock: %r");
+		return -1;
+	}
+
+	bsize = ext4_sb_get_block_size(&fs->sb);
+	if (bsize > EXT4_MAX_BLOCK_SIZE) {
+		werrstr("invalid block size: %d", bsize);
+		return -1;
+	}
+
+	r = ext4_fs_check_features(fs, &read_only);
+	if (r != 0)
+		return r;
+
+	if (read_only)
+		fs->read_only = read_only;
+
+	/* Compute limits for indirect block levels */
+	u32int blocks_id = bsize / sizeof(u32int);
+
+	fs->inode_block_limits[0] = EXT4_INODE_DIRECT_BLOCK_COUNT;
+	fs->inode_blocks_per_level[0] = 1;
+
+	for (i = 1; i < 4; i++) {
+		fs->inode_blocks_per_level[i] =
+		    fs->inode_blocks_per_level[i - 1] * blocks_id;
+		fs->inode_block_limits[i] = fs->inode_block_limits[i - 1] +
+					    fs->inode_blocks_per_level[i];
+	}
+
+	/*Validate FS*/
+	tmp = ext4_get16(&fs->sb, state);
+	if (tmp & EXT4_SUPERBLOCK_STATE_ERROR_FS)
+		ext4_dbg(DEBUG_FS, DBG_WARN
+				"last umount error: superblock fs_error flag\n");
+
+
+	if (!fs->read_only) {
+		/* Mark system as mounted */
+		ext4_set16(&fs->sb, state, EXT4_SUPERBLOCK_STATE_ERROR_FS);
+		r = ext4_sb_write(fs->bdev, &fs->sb);
+		if (r != 0)
+			return r;
+
+		/*Update mount count*/
+		ext4_set16(&fs->sb, mount_count, ext4_get16(&fs->sb, mount_count) + 1);
+	}
+
+	return r;
+}
+
+int ext4_fs_fini(struct ext4_fs *fs)
+{
+	assert(fs);
+
+	/*Set superblock state*/
+	ext4_set16(&fs->sb, state, EXT4_SUPERBLOCK_STATE_VALID_FS);
+
+	if (!fs->read_only)
+		return ext4_sb_write(fs->bdev, &fs->sb);
+
+	return 0;
+}
+
+static void ext4_fs_debug_features_inc(u32int features_incompatible)
+{
+	if (features_incompatible & EXT4_FINCOM_COMPRESSION)
+		ext4_dbg(DEBUG_FS, DBG_NONE "compression\n");
+	if (features_incompatible & EXT4_FINCOM_FILETYPE)
+		ext4_dbg(DEBUG_FS, DBG_NONE "filetype\n");
+	if (features_incompatible & EXT4_FINCOM_RECOVER)
+		ext4_dbg(DEBUG_FS, DBG_NONE "recover\n");
+	if (features_incompatible & EXT4_FINCOM_JOURNAL_DEV)
+		ext4_dbg(DEBUG_FS, DBG_NONE "journal_dev\n");
+	if (features_incompatible & EXT4_FINCOM_META_BG)
+		ext4_dbg(DEBUG_FS, DBG_NONE "meta_bg\n");
+	if (features_incompatible & EXT4_FINCOM_EXTENTS)
+		ext4_dbg(DEBUG_FS, DBG_NONE "extents\n");
+	if (features_incompatible & EXT4_FINCOM_64BIT)
+		ext4_dbg(DEBUG_FS, DBG_NONE "64bit\n");
+	if (features_incompatible & EXT4_FINCOM_MMP)
+		ext4_dbg(DEBUG_FS, DBG_NONE "mnp\n");
+	if (features_incompatible & EXT4_FINCOM_FLEX_BG)
+		ext4_dbg(DEBUG_FS, DBG_NONE "flex_bg\n");
+	if (features_incompatible & EXT4_FINCOM_EA_INODE)
+		ext4_dbg(DEBUG_FS, DBG_NONE "ea_inode\n");
+	if (features_incompatible & EXT4_FINCOM_DIRDATA)
+		ext4_dbg(DEBUG_FS, DBG_NONE "dirdata\n");
+	if (features_incompatible & EXT4_FINCOM_BG_USE_META_CSUM)
+		ext4_dbg(DEBUG_FS, DBG_NONE "meta_csum\n");
+	if (features_incompatible & EXT4_FINCOM_LARGEDIR)
+		ext4_dbg(DEBUG_FS, DBG_NONE "largedir\n");
+	if (features_incompatible & EXT4_FINCOM_INLINE_DATA)
+		ext4_dbg(DEBUG_FS, DBG_NONE "inline_data\n");
+}
+static void ext4_fs_debug_features_comp(u32int features_compatible)
+{
+	if (features_compatible & EXT4_FCOM_DIR_PREALLOC)
+		ext4_dbg(DEBUG_FS, DBG_NONE "dir_prealloc\n");
+	if (features_compatible & EXT4_FCOM_IMAGIC_INODES)
+		ext4_dbg(DEBUG_FS, DBG_NONE "imagic_inodes\n");
+	if (features_compatible & EXT4_FCOM_HAS_JOURNAL)
+		ext4_dbg(DEBUG_FS, DBG_NONE "has_journal\n");
+	if (features_compatible & EXT4_FCOM_EXT_ATTR)
+		ext4_dbg(DEBUG_FS, DBG_NONE "ext_attr\n");
+	if (features_compatible & EXT4_FCOM_RESIZE_INODE)
+		ext4_dbg(DEBUG_FS, DBG_NONE "resize_inode\n");
+	if (features_compatible & EXT4_FCOM_DIR_INDEX)
+		ext4_dbg(DEBUG_FS, DBG_NONE "dir_index\n");
+}
+
+static void ext4_fs_debug_features_ro(u32int features_ro)
+{
+	if (features_ro & EXT4_FRO_COM_SPARSE_SUPER)
+		ext4_dbg(DEBUG_FS, DBG_NONE "sparse_super\n");
+	if (features_ro & EXT4_FRO_COM_LARGE_FILE)
+		ext4_dbg(DEBUG_FS, DBG_NONE "large_file\n");
+	if (features_ro & EXT4_FRO_COM_BTREE_DIR)
+		ext4_dbg(DEBUG_FS, DBG_NONE "btree_dir\n");
+	if (features_ro & EXT4_FRO_COM_HUGE_FILE)
+		ext4_dbg(DEBUG_FS, DBG_NONE "huge_file\n");
+	if (features_ro & EXT4_FRO_COM_GDT_CSUM)
+		ext4_dbg(DEBUG_FS, DBG_NONE "gtd_csum\n");
+	if (features_ro & EXT4_FRO_COM_DIR_NLINK)
+		ext4_dbg(DEBUG_FS, DBG_NONE "dir_nlink\n");
+	if (features_ro & EXT4_FRO_COM_EXTRA_ISIZE)
+		ext4_dbg(DEBUG_FS, DBG_NONE "extra_isize\n");
+	if (features_ro & EXT4_FRO_COM_QUOTA)
+		ext4_dbg(DEBUG_FS, DBG_NONE "quota\n");
+	if (features_ro & EXT4_FRO_COM_BIGALLOC)
+		ext4_dbg(DEBUG_FS, DBG_NONE "bigalloc\n");
+	if (features_ro & EXT4_FRO_COM_METADATA_CSUM)
+		ext4_dbg(DEBUG_FS, DBG_NONE "metadata_csum\n");
+}
+
+int ext4_fs_check_features(struct ext4_fs *fs, bool *read_only)
+{
+	assert(fs && read_only);
+	u32int v;
+	if (ext4_get32(&fs->sb, rev_level) == 0) {
+		*read_only = false;
+		return 0;
+	}
+
+	ext4_dbg(DEBUG_FS, DBG_INFO "sblock features_incompatible:\n");
+	ext4_fs_debug_features_inc(ext4_get32(&fs->sb, features_incompatible));
+
+	ext4_dbg(DEBUG_FS, DBG_INFO "sblock features_compatible:\n");
+	ext4_fs_debug_features_comp(ext4_get32(&fs->sb, features_compatible));
+
+	ext4_dbg(DEBUG_FS, DBG_INFO "sblock features_read_only:\n");
+	ext4_fs_debug_features_ro(ext4_get32(&fs->sb, features_read_only));
+
+	/*Check features_incompatible*/
+	v = ext4_get32(&fs->sb, features_incompatible) &
+	     ~(EXT4_SUPPORTED_FINCOM | EXT_FINCOM_IGNORED);
+	if (v) {
+		ext4_dbg(DEBUG_FS, DBG_ERROR
+				"sblock has unsupported features incompatible:\n");
+		ext4_fs_debug_features_inc(v);
+		werrstr("unsupported features");
+		return -1;
+	}
+
+	/*Check features_read_only*/
+	v = ext4_get32(&fs->sb, features_read_only);
+	v &= ~EXT4_SUPPORTED_FRO_COM;
+	if (v) {
+		ext4_dbg(DEBUG_FS, DBG_WARN
+			"sblock has unsupported features read only:\n");
+		ext4_fs_debug_features_ro(v);
+		*read_only = true;
+		return 0;
+	}
+	*read_only = false;
+
+	return 0;
+}
+
+/**@brief Determine whether the block is inside the group.
+ * @param baddr   block address
+ * @param bgid    block group id
+ * @return Error code
+ */
+static bool ext4_block_in_group(struct ext4_sblock *s, ext4_fsblk_t baddr,
+			        u32int bgid)
+{
+	u32int actual_bgid;
+	actual_bgid = ext4_balloc_get_bgid_of_block(s, baddr);
+	if (actual_bgid == bgid)
+		return true;
+	return false;
+}
+
+/**@brief   To avoid calling the atomic setbit hundreds or thousands of times, we only
+ *          need to use it within a single byte (to ensure we get endianness right).
+ *          We can use memset for the rest of the bitmap as there are no other users.
+ */
+static void ext4_fs_mark_bitmap_end(int start_bit, int end_bit, void *bitmap)
+{
+	int i;
+
+	if (start_bit >= end_bit)
+		return;
+
+	for (i = start_bit; (unsigned)i < ((start_bit + 7) & ~7UL); i++)
+		ext4_bmap_bit_set(bitmap, i);
+
+	if (i < end_bit)
+		memset((char *)bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
+}
+
+/**@brief Initialize block bitmap in block group.
+ * @param bg_ref Reference to block group
+ * @return Error code
+ */
+static int ext4_fs_init_block_bitmap(struct ext4_block_group_ref *bg_ref)
+{
+	struct ext4_sblock *sb = &bg_ref->fs->sb;
+	struct ext4_bgroup *bg = bg_ref->block_group;
+	int rc;
+
+	u32int bit, bit_max;
+	u32int group_blocks;
+	u16int inode_size = ext4_get16(sb, inode_size);
+	u32int block_size = ext4_sb_get_block_size(sb);
+	u32int inodes_per_group = ext4_get32(sb, inodes_per_group);
+
+	ext4_fsblk_t i;
+	ext4_fsblk_t bmp_blk = ext4_bg_get_block_bitmap(bg, sb);
+	ext4_fsblk_t bmp_inode = ext4_bg_get_inode_bitmap(bg, sb);
+	ext4_fsblk_t inode_table = ext4_bg_get_inode_table_first_block(bg, sb);
+	ext4_fsblk_t first_bg = ext4_balloc_get_block_of_bgid(sb, bg_ref->index);
+
+	u32int dsc_per_block =  block_size / ext4_sb_get_desc_size(sb);
+
+	bool flex_bg = ext4_sb_feature_incom(sb, EXT4_FINCOM_FLEX_BG);
+	bool meta_bg = ext4_sb_feature_incom(sb, EXT4_FINCOM_META_BG);
+
+	u32int inode_table_bcnt = inodes_per_group * inode_size / block_size;
+
+	struct ext4_block block_bitmap;
+	rc = ext4_trans_block_get_noread(bg_ref->fs->bdev, &block_bitmap, bmp_blk);
+	if (rc != 0)
+		return rc;
+
+	memset(block_bitmap.data, 0, block_size);
+	bit_max = ext4_sb_is_super_in_bg(sb, bg_ref->index);
+
+	u32int count = ext4_sb_first_meta_bg(sb) * dsc_per_block;
+	if (!meta_bg || bg_ref->index < count) {
+		if (bit_max) {
+			bit_max += ext4_bg_num_gdb(sb, bg_ref->index);
+			bit_max += ext4_get16(sb, s_reserved_gdt_blocks);
+		}
+	} else { /* For META_BG_BLOCK_GROUPS */
+		bit_max += ext4_bg_num_gdb(sb, bg_ref->index);
+	}
+	for (bit = 0; bit < bit_max; bit++)
+		ext4_bmap_bit_set(block_bitmap.data, bit);
+
+	if (bg_ref->index == ext4_block_group_cnt(sb) - 1) {
+		/*
+		 * Even though mke2fs always initialize first and last group
+		 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
+		 * to make sure we calculate the right free blocks
+		 */
+
+		group_blocks = (u32int)(ext4_sb_get_blocks_cnt(sb) -
+					  ext4_get32(sb, first_data_block) -
+					  ext4_get32(sb, blocks_per_group) *
+					  (ext4_block_group_cnt(sb) - 1));
+	} else {
+		group_blocks = ext4_get32(sb, blocks_per_group);
+	}
+
+	bool in_bg;
+	in_bg = ext4_block_in_group(sb, bmp_blk, bg_ref->index);
+	if (!flex_bg || in_bg)
+		ext4_bmap_bit_set(block_bitmap.data,
+				  (u32int)(bmp_blk - first_bg));
+
+	in_bg = ext4_block_in_group(sb, bmp_inode, bg_ref->index);
+	if (!flex_bg || in_bg)
+		ext4_bmap_bit_set(block_bitmap.data,
+				  (u32int)(bmp_inode - first_bg));
+
+        for (i = inode_table; i < inode_table + inode_table_bcnt; i++) {
+		in_bg = ext4_block_in_group(sb, i, bg_ref->index);
+		if (!flex_bg || in_bg)
+			ext4_bmap_bit_set(block_bitmap.data,
+					  (u32int)(i - first_bg));
+	}
+        /*
+         * Also if the number of blocks within the group is
+         * less than the blocksize * 8 ( which is the size
+         * of bitmap ), set rest of the block bitmap to 1
+         */
+        ext4_fs_mark_bitmap_end(group_blocks, block_size * 8, block_bitmap.data);
+	ext4_trans_set_block_dirty(block_bitmap.buf);
+
+	ext4_balloc_set_bitmap_csum(sb, bg_ref->block_group, block_bitmap.data);
+	bg_ref->dirty = true;
+
+	/* Save bitmap */
+	return ext4_block_set(bg_ref->fs->bdev, &block_bitmap);
+}
+
+/**@brief Initialize i-node bitmap in block group.
+ * @param bg_ref Reference to block group
+ * @return Error code
+ */
+static int ext4_fs_init_inode_bitmap(struct ext4_block_group_ref *bg_ref)
+{
+	int rc;
+	struct ext4_sblock *sb = &bg_ref->fs->sb;
+	struct ext4_bgroup *bg = bg_ref->block_group;
+
+	/* Load bitmap */
+	ext4_fsblk_t bitmap_block_addr = ext4_bg_get_inode_bitmap(bg, sb);
+
+	struct ext4_block b;
+	rc = ext4_trans_block_get_noread(bg_ref->fs->bdev, &b, bitmap_block_addr);
+	if (rc != 0)
+		return rc;
+
+	/* Initialize all bitmap bits to zero */
+	u32int block_size = ext4_sb_get_block_size(sb);
+	u32int inodes_per_group = ext4_get32(sb, inodes_per_group);
+
+	memset(b.data, 0, (inodes_per_group + 7) / 8);
+
+	u32int start_bit = inodes_per_group;
+	u32int end_bit = block_size * 8;
+
+	u32int i;
+	for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
+		ext4_bmap_bit_set(b.data, i);
+
+	if (i < end_bit)
+		memset(b.data + (i >> 3), 0xff, (end_bit - i) >> 3);
+
+	ext4_trans_set_block_dirty(b.buf);
+
+	ext4_ialloc_set_bitmap_csum(sb, bg, b.data);
+	bg_ref->dirty = true;
+
+	/* Save bitmap */
+	return ext4_block_set(bg_ref->fs->bdev, &b);
+}
+
+/**@brief Initialize i-node table in block group.
+ * @param bg_ref Reference to block group
+ * @return Error code
+ */
+static int ext4_fs_init_inode_table(struct ext4_block_group_ref *bg_ref)
+{
+	struct ext4_sblock *sb = &bg_ref->fs->sb;
+	struct ext4_bgroup *bg = bg_ref->block_group;
+
+	u32int inode_size = ext4_get16(sb, inode_size);
+	u32int block_size = ext4_sb_get_block_size(sb);
+	u32int inodes_per_block = block_size / inode_size;
+	u32int inodes_in_group = ext4_inodes_in_group_cnt(sb, bg_ref->index);
+	u32int table_blocks = inodes_in_group / inodes_per_block;
+	ext4_fsblk_t fblock;
+
+	if (inodes_in_group % inodes_per_block)
+		table_blocks++;
+
+	/* Compute initialization bounds */
+	ext4_fsblk_t first_block = ext4_bg_get_inode_table_first_block(bg, sb);
+
+	ext4_fsblk_t last_block = first_block + table_blocks - 1;
+
+	/* Initialization of all itable blocks */
+	for (fblock = first_block; fblock <= last_block; ++fblock) {
+		struct ext4_block b;
+		int rc = ext4_trans_block_get_noread(bg_ref->fs->bdev, &b, fblock);
+		if (rc != 0)
+			return rc;
+
+		memset(b.data, 0, block_size);
+		ext4_trans_set_block_dirty(b.buf);
+
+		rc = ext4_block_set(bg_ref->fs->bdev, &b);
+		if (rc != 0)
+			return rc;
+	}
+
+	return 0;
+}
+
+static ext4_fsblk_t ext4_fs_get_descriptor_block(struct ext4_sblock *s,
+					     u32int bgid,
+					     u32int dsc_per_block)
+{
+	u32int first_meta_bg, dsc_id;
+	int has_super = 0;
+	dsc_id = bgid / dsc_per_block;
+	first_meta_bg = ext4_sb_first_meta_bg(s);
+
+	bool meta_bg = ext4_sb_feature_incom(s, EXT4_FINCOM_META_BG);
+
+	if (!meta_bg || dsc_id < first_meta_bg)
+		return ext4_get32(s, first_data_block) + dsc_id + 1;
+
+	if (ext4_sb_is_super_in_bg(s, bgid))
+		has_super = 1;
+
+	return (has_super + ext4_fs_first_bg_block_no(s, bgid));
+}
+
+/**@brief  Compute checksum of block group descriptor.
+ * @param sb   Superblock
+ * @param bgid Index of block group in the filesystem
+ * @param bg   Block group to compute checksum for
+ * @return Checksum value
+ */
+static u16int ext4_fs_bg_checksum(struct ext4_sblock *sb, u32int bgid,
+				    struct ext4_bgroup *bg)
+{
+	/* If checksum not supported, 0 will be returned */
+	u16int crc = 0;
+
+	/* Compute the checksum only if the filesystem supports it */
+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+		/* Use metadata_csum algorithm instead */
+		u32int le32_bgid = to_le32(bgid);
+		u32int orig_checksum, checksum;
+
+		/* Preparation: temporarily set bg checksum to 0 */
+		orig_checksum = bg->checksum;
+		bg->checksum = 0;
+
+		/* First calculate crc32 checksum against fs uuid */
+		checksum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid,
+				sizeof(sb->uuid));
+		/* Then calculate crc32 checksum against bgid */
+		checksum = ext4_crc32c(checksum, &le32_bgid, sizeof(bgid));
+		/* Finally calculate crc32 checksum against block_group_desc */
+		checksum = ext4_crc32c(checksum, bg, ext4_sb_get_desc_size(sb));
+		bg->checksum = orig_checksum;
+
+		crc = checksum & 0xFFFF;
+		return crc;
+	}
+
+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_GDT_CSUM)) {
+		u8int *base = (u8int *)bg;
+		u8int *checksum = (u8int *)&bg->checksum;
+
+		u32int offset = (u32int)(checksum - base);
+
+		/* Convert block group index to little endian */
+		u32int group = to_le32(bgid);
+
+		/* Initialization */
+		crc = ext4_bg_crc16(~0, sb->uuid, sizeof(sb->uuid));
+
+		/* Include index of block group */
+		crc = ext4_bg_crc16(crc, (u8int *)&group, sizeof(group));
+
+		/* Compute crc from the first part (stop before checksum field)
+		 */
+		crc = ext4_bg_crc16(crc, (u8int *)bg, offset);
+
+		/* Skip checksum */
+		offset += sizeof(bg->checksum);
+
+		/* Checksum of the rest of block group descriptor */
+		if ((ext4_sb_feature_incom(sb, EXT4_FINCOM_64BIT)) &&
+		    (offset < ext4_sb_get_desc_size(sb))) {
+
+			const u8int *start = ((u8int *)bg) + offset;
+			usize len = ext4_sb_get_desc_size(sb) - offset;
+			crc = ext4_bg_crc16(crc, start, len);
+		}
+	}
+	return crc;
+}
+
+static bool ext4_fs_verify_bg_csum(struct ext4_sblock *sb,
+				   u32int bgid,
+				   struct ext4_bgroup *bg)
+{
+	if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+		return true;
+
+	return ext4_fs_bg_checksum(sb, bgid, bg) == to_le16(bg->checksum);
+}
+
+int ext4_fs_get_block_group_ref(struct ext4_fs *fs, u32int bgid,
+				struct ext4_block_group_ref *ref)
+{
+	/* Compute number of descriptors, that fits in one data block */
+	u32int block_size = ext4_sb_get_block_size(&fs->sb);
+	u32int dsc_cnt = block_size / ext4_sb_get_desc_size(&fs->sb);
+
+	/* Block group descriptor table starts at the next block after
+	 * superblock */
+	u64int block_id = ext4_fs_get_descriptor_block(&fs->sb, bgid, dsc_cnt);
+
+	u32int offset = (bgid % dsc_cnt) * ext4_sb_get_desc_size(&fs->sb);
+
+	int rc = ext4_trans_block_get(fs->bdev, &ref->block, block_id);
+	if (rc != 0)
+		return rc;
+
+	ref->block_group = (void *)(ref->block.data + offset);
+	ref->fs = fs;
+	ref->index = bgid;
+	ref->dirty = false;
+	struct ext4_bgroup *bg = ref->block_group;
+
+	if (!ext4_fs_verify_bg_csum(&fs->sb, bgid, bg)) {
+		ext4_dbg(DEBUG_FS,
+			 DBG_WARN "Block group descriptor checksum failed."
+			 "Block group index: %ud\n",
+			 bgid);
+	}
+
+	if (ext4_bg_has_flag(bg, EXT4_BLOCK_GROUP_BLOCK_UNINIT)) {
+		rc = ext4_fs_init_block_bitmap(ref);
+		if (rc != 0) {
+			ext4_block_set(fs->bdev, &ref->block);
+			return rc;
+		}
+		ext4_bg_clear_flag(bg, EXT4_BLOCK_GROUP_BLOCK_UNINIT);
+		ref->dirty = true;
+	}
+
+	if (ext4_bg_has_flag(bg, EXT4_BLOCK_GROUP_INODE_UNINIT)) {
+		rc = ext4_fs_init_inode_bitmap(ref);
+		if (rc != 0) {
+			ext4_block_set(ref->fs->bdev, &ref->block);
+			return rc;
+		}
+
+		ext4_bg_clear_flag(bg, EXT4_BLOCK_GROUP_INODE_UNINIT);
+
+		if (!ext4_bg_has_flag(bg, EXT4_BLOCK_GROUP_ITABLE_ZEROED)) {
+			rc = ext4_fs_init_inode_table(ref);
+			if (rc != 0) {
+				ext4_block_set(fs->bdev, &ref->block);
+				return rc;
+			}
+
+			ext4_bg_set_flag(bg, EXT4_BLOCK_GROUP_ITABLE_ZEROED);
+		}
+
+		ref->dirty = true;
+	}
+
+	return 0;
+}
+
+int ext4_fs_put_block_group_ref(struct ext4_block_group_ref *ref)
+{
+	/* Check if reference modified */
+	if (ref->dirty) {
+		/* Compute new checksum of block group */
+		u16int cs;
+		cs = ext4_fs_bg_checksum(&ref->fs->sb, ref->index,
+					 ref->block_group);
+		ref->block_group->checksum = to_le16(cs);
+
+		/* Mark block dirty for writing changes to physical device */
+		ext4_trans_set_block_dirty(ref->block.buf);
+	}
+
+	/* Put back block, that contains block group descriptor */
+	return ext4_block_set(ref->fs->bdev, &ref->block);
+}
+
+static u32int ext4_fs_inode_checksum(struct ext4_inode_ref *inode_ref)
+{
+	u32int checksum = 0;
+	struct ext4_sblock *sb = &inode_ref->fs->sb;
+	u16int inode_size = ext4_get16(sb, inode_size);
+
+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+		u32int orig_checksum;
+
+		u32int ino_index = to_le32(inode_ref->index);
+		u32int ino_gen =
+			to_le32(ext4_inode_get_generation(inode_ref->inode));
+
+		/* Preparation: temporarily set bg checksum to 0 */
+		orig_checksum = ext4_inode_get_csum(sb, inode_ref->inode);
+		ext4_inode_set_csum(sb, inode_ref->inode, 0);
+
+		/* First calculate crc32 checksum against fs uuid */
+		checksum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid,
+				       sizeof(sb->uuid));
+		/* Then calculate crc32 checksum against inode number
+		 * and inode generation */
+		checksum = ext4_crc32c(checksum, &ino_index, sizeof(ino_index));
+		checksum = ext4_crc32c(checksum, &ino_gen, sizeof(ino_gen));
+		/* Finally calculate crc32 checksum against
+		 * the entire inode */
+		checksum = ext4_crc32c(checksum, inode_ref->inode, inode_size);
+		ext4_inode_set_csum(sb, inode_ref->inode, orig_checksum);
+
+		/* If inode size is not large enough to hold the
+		 * upper 16bit of the checksum */
+		if (inode_size == EXT4_GOOD_OLD_INODE_SIZE)
+			checksum &= 0xFFFF;
+
+	}
+	return checksum;
+}
+
+static void ext4_fs_set_inode_checksum(struct ext4_inode_ref *inode_ref)
+{
+	struct ext4_sblock *sb = &inode_ref->fs->sb;
+	if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+		return;
+
+	u32int csum = ext4_fs_inode_checksum(inode_ref);
+	ext4_inode_set_csum(sb, inode_ref->inode, csum);
+}
+
+static bool ext4_fs_verify_inode_csum(struct ext4_inode_ref *inode_ref)
+{
+	struct ext4_sblock *sb = &inode_ref->fs->sb;
+	if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+		return true;
+
+	return ext4_inode_get_csum(sb, inode_ref->inode) ==
+		ext4_fs_inode_checksum(inode_ref);
+}
+
+static int
+__ext4_fs_get_inode_ref(struct ext4_fs *fs, u32int index,
+			struct ext4_inode_ref *ref,
+			bool initialized)
+{
+	/* Compute number of i-nodes, that fits in one data block */
+	u32int inodes_per_group = ext4_get32(&fs->sb, inodes_per_group);
+
+	/*
+	 * Inode numbers are 1-based, but it is simpler to work with 0-based
+	 * when computing indices
+	 */
+	index -= 1;
+	u32int block_group = index / inodes_per_group;
+	u32int offset_in_group = index % inodes_per_group;
+
+	/* Load block group, where i-node is located */
+	struct ext4_block_group_ref bg_ref;
+
+	int rc = ext4_fs_get_block_group_ref(fs, block_group, &bg_ref);
+	if (rc != 0) {
+		return rc;
+	}
+
+	/* Load block address, where i-node table is located */
+	ext4_fsblk_t inode_table_start =
+	    ext4_bg_get_inode_table_first_block(bg_ref.block_group, &fs->sb);
+
+	/* Put back block group reference (not needed more) */
+	rc = ext4_fs_put_block_group_ref(&bg_ref);
+	if (rc != 0) {
+		return rc;
+	}
+
+	/* Compute position of i-node in the block group */
+	u16int inode_size = ext4_get16(&fs->sb, inode_size);
+	u32int block_size = ext4_sb_get_block_size(&fs->sb);
+	u32int byte_offset_in_group = offset_in_group * inode_size;
+
+	/* Compute block address */
+	ext4_fsblk_t block_id =
+	    inode_table_start + (byte_offset_in_group / block_size);
+
+	rc = ext4_trans_block_get(fs->bdev, &ref->block, block_id);
+	if (rc != 0) {
+		return rc;
+	}
+
+	/* Compute position of i-node in the data block */
+	u32int offset_in_block = byte_offset_in_group % block_size;
+	ref->inode = (struct ext4_inode *)(ref->block.data + offset_in_block);
+
+	/* We need to store the original value of index in the reference */
+	ref->index = index + 1;
+	ref->fs = fs;
+	ref->dirty = false;
+
+	if (initialized && !ext4_fs_verify_inode_csum(ref)) {
+		ext4_dbg(DEBUG_FS,
+			DBG_WARN "Inode checksum failed."
+			"Inode: %ud\n",
+			ref->index);
+	}
+
+	return 0;
+}
+
+int ext4_fs_get_inode_ref(struct ext4_fs *fs, u32int index,
+			  struct ext4_inode_ref *ref)
+{
+	return __ext4_fs_get_inode_ref(fs, index, ref, true);
+}
+
+int ext4_fs_put_inode_ref(struct ext4_inode_ref *ref)
+{
+	/* Check if reference modified */
+	if (ref->dirty) {
+		/* Mark block dirty for writing changes to physical device */
+		ext4_fs_set_inode_checksum(ref);
+		ext4_trans_set_block_dirty(ref->block.buf);
+	}
+
+	/* Put back block, that contains i-node */
+	return ext4_block_set(ref->fs->bdev, &ref->block);
+}
+
+void ext4_fs_inode_blocks_init(struct ext4_fs *fs,
+			       struct ext4_inode_ref *inode_ref)
+{
+	struct ext4_inode *inode = inode_ref->inode;
+
+	/* Reset blocks array. For inode which is not directory or file, just
+	 * fill in blocks with 0 */
+	switch (ext4_inode_type(&fs->sb, inode)) {
+	case EXT4_INODE_MODE_FILE:
+	case EXT4_INODE_MODE_DIRECTORY:
+		break;
+	default:
+		return;
+	}
+
+	/* Initialize extents if needed */
+	if (ext4_sb_feature_incom(&fs->sb, EXT4_FINCOM_EXTENTS)) {
+		ext4_inode_set_flag(inode, EXT4_INODE_FLAG_EXTENTS);
+
+		/* Initialize extent root header */
+		ext4_extent_tree_init(inode_ref);
+	}
+
+	inode_ref->dirty = true;
+}
+
+u32int ext4_fs_correspond_inode_mode(int filetype)
+{
+	switch (filetype) {
+	case EXT4_DE_DIR:
+		return EXT4_INODE_MODE_DIRECTORY;
+	case EXT4_DE_REG_FILE:
+		return EXT4_INODE_MODE_FILE;
+	case EXT4_DE_SYMLINK:
+		return EXT4_INODE_MODE_SOFTLINK;
+	case EXT4_DE_CHRDEV:
+		return EXT4_INODE_MODE_CHARDEV;
+	case EXT4_DE_BLKDEV:
+		return EXT4_INODE_MODE_BLOCKDEV;
+	case EXT4_DE_FIFO:
+		return EXT4_INODE_MODE_FIFO;
+	case EXT4_DE_SOCK:
+		return EXT4_INODE_MODE_SOCKET;
+	}
+	/* FIXME: unsupported filetype */
+	return EXT4_INODE_MODE_FILE;
+}
+
+int ext4_fs_alloc_inode(struct ext4_fs *fs, struct ext4_inode_ref *inode_ref,
+			int filetype)
+{
+	/* Check if newly allocated i-node will be a directory */
+	bool is_dir;
+	u16int inode_size = ext4_get16(&fs->sb, inode_size);
+
+	is_dir = (filetype == EXT4_DE_DIR);
+
+	/* Allocate inode by allocation algorithm */
+	u32int index;
+	int rc = ext4_ialloc_alloc_inode(fs, &index, is_dir);
+	if (rc != 0)
+		return rc;
+
+	/* Load i-node from on-disk i-node table */
+	rc = __ext4_fs_get_inode_ref(fs, index, inode_ref, false);
+	if (rc != 0) {
+		ext4_ialloc_free_inode(fs, index, is_dir);
+		return rc;
+	}
+
+	/* Initialize i-node */
+	struct ext4_inode *inode = inode_ref->inode;
+
+	memset(inode, 0, inode_size);
+
+	u32int mode;
+	if (is_dir) {
+		/*
+		 * Default directory permissions to be compatible with other
+		 * systems
+		 * 0777 (octal) == rwxrwxrwx
+		 */
+
+		mode = 0777;
+		mode |= EXT4_INODE_MODE_DIRECTORY;
+	} else if (filetype == EXT4_DE_SYMLINK) {
+		/*
+		 * Default symbolic link permissions to be compatible with other systems
+		 * 0777 (octal) == rwxrwxrwx
+		 */
+
+		mode = 0777;
+		mode |= EXT4_INODE_MODE_SOFTLINK;
+	} else {
+		/*
+		 * Default file permissions to be compatible with other systems
+		 * 0666 (octal) == rw-rw-rw-
+		 */
+
+		mode = 0666;
+		mode |= ext4_fs_correspond_inode_mode(filetype);
+	}
+	ext4_inode_set_mode(&fs->sb, inode, mode);
+
+	ext4_inode_set_links_cnt(inode, 0);
+	ext4_inode_set_uid(inode, 0);
+	ext4_inode_set_gid(inode, 0);
+	ext4_inode_set_size(inode, 0);
+	ext4_inode_set_access_time(inode, 0);
+	ext4_inode_set_change_inode_time(inode, 0);
+	ext4_inode_set_modif_time(inode, 0);
+	ext4_inode_set_del_time(inode, 0);
+	ext4_inode_set_blocks_count(&fs->sb, inode, 0);
+	ext4_inode_set_flags(inode, 0);
+	ext4_inode_set_generation(inode, 0);
+	if (inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
+		u16int size = ext4_get16(&fs->sb, want_extra_isize);
+		ext4_inode_set_extra_isize(&fs->sb, inode, size);
+	}
+
+	memset(inode->blocks, 0, sizeof(inode->blocks));
+	inode_ref->dirty = true;
+
+	return 0;
+}
+
+int ext4_fs_free_inode(struct ext4_inode_ref *inode_ref)
+{
+	struct ext4_fs *fs = inode_ref->fs;
+	u32int offset;
+	u32int suboff;
+	int rc;
+
+	/* For extents must be data block destroyed by other way */
+	if ((ext4_sb_feature_incom(&fs->sb, EXT4_FINCOM_EXTENTS)) &&
+	    (ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_EXTENTS))) {
+		/* Data structures are released during truncate operation... */
+		goto finish;
+	}
+
+	/* Release all indirect (no data) blocks */
+
+	/* 1) Single indirect */
+	ext4_fsblk_t fblock = ext4_inode_get_indirect_block(inode_ref->inode, 0);
+	if (fblock != 0) {
+		int rc = ext4_balloc_free_block(inode_ref, fblock);
+		if (rc != 0)
+			return rc;
+
+		ext4_inode_set_indirect_block(inode_ref->inode, 0, 0);
+	}
+
+	u32int block_size = ext4_sb_get_block_size(&fs->sb);
+	u32int count = block_size / sizeof(u32int);
+
+	struct ext4_block block;
+
+	/* 2) Double indirect */
+	fblock = ext4_inode_get_indirect_block(inode_ref->inode, 1);
+	if (fblock != 0) {
+		int rc = ext4_trans_block_get(fs->bdev, &block, fblock);
+		if (rc != 0)
+			return rc;
+
+		ext4_fsblk_t ind_block;
+		for (offset = 0; offset < count; ++offset) {
+			ind_block = to_le32(((u32int *)block.data)[offset]);
+
+			if (ind_block == 0)
+				continue;
+			rc = ext4_balloc_free_block(inode_ref, ind_block);
+			if (rc != 0) {
+				ext4_block_set(fs->bdev, &block);
+				return rc;
+			}
+
+		}
+
+		ext4_block_set(fs->bdev, &block);
+		rc = ext4_balloc_free_block(inode_ref, fblock);
+		if (rc != 0)
+			return rc;
+
+		ext4_inode_set_indirect_block(inode_ref->inode, 1, 0);
+	}
+
+	/* 3) Tripple indirect */
+	struct ext4_block subblock;
+	fblock = ext4_inode_get_indirect_block(inode_ref->inode, 2);
+	if (fblock == 0)
+		goto finish;
+	rc = ext4_trans_block_get(fs->bdev, &block, fblock);
+	if (rc != 0)
+		return rc;
+
+	ext4_fsblk_t ind_block;
+	for (offset = 0; offset < count; ++offset) {
+		ind_block = to_le32(((u32int *)block.data)[offset]);
+
+		if (ind_block == 0)
+			continue;
+		rc = ext4_trans_block_get(fs->bdev, &subblock,
+				ind_block);
+		if (rc != 0) {
+			ext4_block_set(fs->bdev, &block);
+			return rc;
+		}
+
+		ext4_fsblk_t ind_subblk;
+		for (suboff = 0; suboff < count; ++suboff) {
+			ind_subblk = to_le32(((u32int *)subblock.data)[suboff]);
+
+			if (ind_subblk == 0)
+				continue;
+			rc = ext4_balloc_free_block(inode_ref, ind_subblk);
+			if (rc != 0) {
+				ext4_block_set(fs->bdev, &subblock);
+				ext4_block_set(fs->bdev, &block);
+				return rc;
+			}
+
+		}
+
+		ext4_block_set(fs->bdev, &subblock);
+
+		rc = ext4_balloc_free_block(inode_ref,
+				ind_block);
+		if (rc != 0) {
+			ext4_block_set(fs->bdev, &block);
+			return rc;
+		}
+
+	}
+
+	ext4_block_set(fs->bdev, &block);
+	rc = ext4_balloc_free_block(inode_ref, fblock);
+	if (rc != 0)
+		return rc;
+
+	ext4_inode_set_indirect_block(inode_ref->inode, 2, 0);
+finish:
+	/* Mark inode dirty for writing to the physical device */
+	inode_ref->dirty = true;
+
+	/* Free block with extended attributes if present */
+	ext4_fsblk_t xattr_block =
+	    ext4_inode_get_file_acl(inode_ref->inode, &fs->sb);
+	if (xattr_block) {
+		int rc = ext4_balloc_free_block(inode_ref, xattr_block);
+		if (rc != 0)
+			return rc;
+
+		ext4_inode_set_file_acl(inode_ref->inode, &fs->sb, 0);
+	}
+
+	/* Free inode by allocator */
+	if (ext4_inode_is_type(&fs->sb, inode_ref->inode,
+			       EXT4_INODE_MODE_DIRECTORY))
+		rc = ext4_ialloc_free_inode(fs, inode_ref->index, true);
+	else
+		rc = ext4_ialloc_free_inode(fs, inode_ref->index, false);
+
+	return rc;
+}
+
+
+/**@brief Release data block from i-node
+ * @param inode_ref I-node to release block from
+ * @param iblock    Logical block to be released
+ * @return Error code
+ */
+static int ext4_fs_release_inode_block(struct ext4_inode_ref *inode_ref,
+				ext4_lblk_t iblock)
+{
+	ext4_fsblk_t fblock;
+
+	struct ext4_fs *fs = inode_ref->fs;
+
+	/* Extents are handled otherwise = there is not support in this function
+	 */
+	assert(!(
+	    ext4_sb_feature_incom(&fs->sb, EXT4_FINCOM_EXTENTS) &&
+	    (ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_EXTENTS))));
+
+	struct ext4_inode *inode = inode_ref->inode;
+
+	/* Handle simple case when we are dealing with direct reference */
+	if (iblock < EXT4_INODE_DIRECT_BLOCK_COUNT) {
+		fblock = ext4_inode_get_direct_block(inode, iblock);
+
+		/* Sparse file */
+		if (fblock == 0)
+			return 0;
+
+		ext4_inode_set_direct_block(inode, iblock, 0);
+		return ext4_balloc_free_block(inode_ref, fblock);
+	}
+
+	/* Determine the indirection level needed to get the desired block */
+	unsigned int level = 0;
+	unsigned int i;
+	for (i = 1; i < 4; i++) {
+		if (iblock < fs->inode_block_limits[i]) {
+			level = i;
+			break;
+		}
+	}
+
+	if (level == 0) {
+		werrstr(Eio);
+		return -1;
+	}
+
+	/* Compute offsets for the topmost level */
+	u32int block_offset_in_level =
+		(u32int)(iblock - fs->inode_block_limits[level - 1]);
+	ext4_fsblk_t current_block =
+	    ext4_inode_get_indirect_block(inode, level - 1);
+	u32int offset_in_block =
+	    (u32int)(block_offset_in_level / fs->inode_blocks_per_level[level - 1]);
+
+	/*
+	 * Navigate through other levels, until we find the block number
+	 * or find null reference meaning we are dealing with sparse file
+	 */
+	struct ext4_block block;
+
+	while (level > 0) {
+
+		/* Sparse check */
+		if (current_block == 0)
+			return 0;
+
+		int rc = ext4_trans_block_get(fs->bdev, &block, current_block);
+		if (rc != 0)
+			return rc;
+
+		current_block =
+		    to_le32(((u32int *)block.data)[offset_in_block]);
+
+		/* Set zero if physical data block address found */
+		if (level == 1) {
+			((u32int *)block.data)[offset_in_block] = to_le32(0);
+			ext4_trans_set_block_dirty(block.buf);
+		}
+
+		rc = ext4_block_set(fs->bdev, &block);
+		if (rc != 0)
+			return rc;
+
+		level--;
+
+		/*
+		 * If we are on the last level, break here as
+		 * there is no next level to visit
+		 */
+		if (level == 0)
+			break;
+
+		/* Visit the next level */
+		block_offset_in_level %= fs->inode_blocks_per_level[level];
+		offset_in_block = (u32int)(block_offset_in_level /
+				  fs->inode_blocks_per_level[level - 1]);
+	}
+
+	fblock = current_block;
+	if (fblock == 0)
+		return 0;
+
+	/* Physical block is not referenced, it can be released */
+	return ext4_balloc_free_block(inode_ref, fblock);
+}
+
+int ext4_fs_truncate_inode(struct ext4_inode_ref *inode_ref, u64int new_size)
+{
+	struct ext4_sblock *sb = &inode_ref->fs->sb;
+	u32int i;
+	int r;
+	bool v;
+
+	/* Check flags, if i-node can be truncated */
+	if (!ext4_inode_can_truncate(sb, inode_ref->inode)) {
+		werrstr(Einval);
+		return -1;
+	}
+
+	/* If sizes are equal, nothing has to be done. */
+	u64int old_size = ext4_inode_get_size(sb, inode_ref->inode);
+	if (old_size == new_size)
+		return 0;
+
+	/* It's not supported to make the larger file by truncate operation */
+	if (old_size < new_size) {
+		werrstr(Einval);
+		return -1;
+	}
+
+	/* For symbolic link which is small enough */
+	v = ext4_inode_is_type(sb, inode_ref->inode, EXT4_INODE_MODE_SOFTLINK);
+	if (v && old_size < sizeof(inode_ref->inode->blocks) &&
+	    !ext4_inode_get_blocks_count(sb, inode_ref->inode)) {
+		char *content = (char *)inode_ref->inode->blocks + new_size;
+		memset(content, 0,
+		       sizeof(inode_ref->inode->blocks) - (u32int)new_size);
+		ext4_inode_set_size(inode_ref->inode, new_size);
+		inode_ref->dirty = true;
+
+		return 0;
+	}
+
+	i = ext4_inode_type(sb, inode_ref->inode);
+	if (i == EXT4_INODE_MODE_CHARDEV ||
+	    i == EXT4_INODE_MODE_BLOCKDEV ||
+	    i == EXT4_INODE_MODE_SOCKET) {
+		inode_ref->inode->blocks[0] = 0;
+		inode_ref->inode->blocks[1] = 0;
+
+		inode_ref->dirty = true;
+		return 0;
+	}
+
+	/* Compute how many blocks will be released */
+	u32int block_size = ext4_sb_get_block_size(sb);
+	u32int new_blocks_cnt = (u32int)((new_size + block_size - 1) / block_size);
+	u32int old_blocks_cnt = (u32int)((old_size + block_size - 1) / block_size);
+	u32int diff_blocks_cnt = old_blocks_cnt - new_blocks_cnt;
+
+	if ((ext4_sb_feature_incom(sb, EXT4_FINCOM_EXTENTS)) &&
+	    (ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_EXTENTS))) {
+
+		/* Extents require special operation */
+		if (diff_blocks_cnt) {
+			r = ext4_extent_remove_space(inode_ref, new_blocks_cnt,
+						     EXT4_EXTENT_MAX_BLOCKS);
+			if (r != 0)
+				return r;
+
+		}
+	} else {
+		/* Release data blocks from the end of file */
+
+		/* Starting from 1 because of logical blocks are numbered from 0
+		 */
+		for (i = 0; i < diff_blocks_cnt; ++i) {
+			r = ext4_fs_release_inode_block(inode_ref,
+							new_blocks_cnt + i);
+			if (r != 0)
+				return r;
+		}
+	}
+
+	/* Update i-node */
+	ext4_inode_set_size(inode_ref->inode, new_size);
+	inode_ref->dirty = true;
+
+	return 0;
+}
+
+/**@brief Compute 'goal' for inode index
+ * @param inode_ref Reference to inode, to allocate block for
+ * @return goal
+ */
+ext4_fsblk_t ext4_fs_inode_to_goal_block(struct ext4_inode_ref *inode_ref)
+{
+	u32int grp_inodes = ext4_get32(&inode_ref->fs->sb, inodes_per_group);
+	return (inode_ref->index - 1) / grp_inodes;
+}
+
+/**@brief Compute 'goal' for allocation algorithm (For blockmap).
+ * @param inode_ref Reference to inode, to allocate block for
+ * @param goal
+ * @return error code
+ */
+int ext4_fs_indirect_find_goal(struct ext4_inode_ref *inode_ref,
+			       ext4_fsblk_t *goal)
+{
+	int r;
+	struct ext4_sblock *sb = &inode_ref->fs->sb;
+	*goal = 0;
+
+	u64int inode_size = ext4_inode_get_size(sb, inode_ref->inode);
+	u32int block_size = ext4_sb_get_block_size(sb);
+	u32int iblock_cnt = (u32int)(inode_size / block_size);
+
+	if (inode_size % block_size != 0)
+		iblock_cnt++;
+
+	/* If inode has some blocks, get last block address + 1 */
+	if (iblock_cnt > 0) {
+		r = ext4_fs_get_inode_dblk_idx(inode_ref, iblock_cnt - 1,
+					       goal, false);
+		if (r != 0)
+			return r;
+
+		if (*goal != 0) {
+			(*goal)++;
+			return r;
+		}
+
+		/* If goal == 0, sparse file -> continue */
+	}
+
+	/* Identify block group of inode */
+
+	u32int inodes_per_bg = ext4_get32(sb, inodes_per_group);
+	u32int block_group = (inode_ref->index - 1) / inodes_per_bg;
+	block_size = ext4_sb_get_block_size(sb);
+
+	/* Load block group reference */
+	struct ext4_block_group_ref bg_ref;
+	r = ext4_fs_get_block_group_ref(inode_ref->fs, block_group, &bg_ref);
+	if (r != 0)
+		return r;
+
+	struct ext4_bgroup *bg = bg_ref.block_group;
+
+	/* Compute indexes */
+	u32int bg_count = ext4_block_group_cnt(sb);
+	ext4_fsblk_t itab_first_block = ext4_bg_get_inode_table_first_block(bg, sb);
+	u16int itab_item_size = ext4_get16(sb, inode_size);
+	u32int itab_bytes;
+
+	/* Check for last block group */
+	if (block_group < bg_count - 1) {
+		itab_bytes = inodes_per_bg * itab_item_size;
+	} else {
+		/* Last block group could be smaller */
+		u32int inodes_cnt = ext4_get32(sb, inodes_count);
+
+		itab_bytes = (inodes_cnt - ((bg_count - 1) * inodes_per_bg));
+		itab_bytes *= itab_item_size;
+	}
+
+	ext4_fsblk_t inode_table_blocks = itab_bytes / block_size;
+
+	if (itab_bytes % block_size)
+		inode_table_blocks++;
+
+	*goal = itab_first_block + inode_table_blocks;
+
+	return ext4_fs_put_block_group_ref(&bg_ref);
+}
+
+static int ext4_fs_get_inode_dblk_idx_internal(struct ext4_inode_ref *inode_ref,
+				       ext4_lblk_t iblock, ext4_fsblk_t *fblock,
+				       bool extent_create,
+				       bool support_unwritten)
+{
+	struct ext4_fs *fs = inode_ref->fs;
+
+	/* For empty file is situation simple */
+	if (ext4_inode_get_size(&fs->sb, inode_ref->inode) == 0) {
+		*fblock = 0;
+		return 0;
+	}
+
+	ext4_fsblk_t current_block;
+
+	USED(extent_create);
+	USED(support_unwritten);
+
+	/* Handle i-node using extents */
+	if ((ext4_sb_feature_incom(&fs->sb, EXT4_FINCOM_EXTENTS)) &&
+	    (ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_EXTENTS))) {
+
+		ext4_fsblk_t current_fsblk;
+		int rc = ext4_extent_get_blocks(inode_ref, iblock, 1,
+				&current_fsblk, extent_create, nil);
+		if (rc != 0)
+			return rc;
+
+		current_block = current_fsblk;
+		*fblock = current_block;
+
+		return 0;
+	}
+
+	struct ext4_inode *inode = inode_ref->inode;
+
+	/* Direct block are read directly from array in i-node structure */
+	if (iblock < EXT4_INODE_DIRECT_BLOCK_COUNT) {
+		current_block =
+		    ext4_inode_get_direct_block(inode, (u32int)iblock);
+		*fblock = current_block;
+		return 0;
+	}
+
+	/* Determine indirection level of the target block */
+	unsigned int l = 0;
+	unsigned int i;
+	for (i = 1; i < 4; i++) {
+		if (iblock < fs->inode_block_limits[i]) {
+			l = i;
+			break;
+		}
+	}
+
+	if (l == 0) {
+		werrstr(Eio);
+		return -1;
+	}
+
+	/* Compute offsets for the topmost level */
+	u32int blk_off_in_lvl = (u32int)(iblock - fs->inode_block_limits[l - 1]);
+	current_block = ext4_inode_get_indirect_block(inode, l - 1);
+	u32int off_in_blk = (u32int)(blk_off_in_lvl / fs->inode_blocks_per_level[l - 1]);
+
+	/* Sparse file */
+	if (current_block == 0) {
+		*fblock = 0;
+		return 0;
+	}
+
+	struct ext4_block block;
+
+	/*
+	 * Navigate through other levels, until we find the block number
+	 * or find null reference meaning we are dealing with sparse file
+	 */
+	while (l > 0) {
+		/* Load indirect block */
+		int rc = ext4_trans_block_get(fs->bdev, &block, current_block);
+		if (rc != 0)
+			return rc;
+
+		/* Read block address from indirect block */
+		current_block =
+		    to_le32(((u32int *)block.data)[off_in_blk]);
+
+		/* Put back indirect block untouched */
+		rc = ext4_block_set(fs->bdev, &block);
+		if (rc != 0)
+			return rc;
+
+		/* Check for sparse file */
+		if (current_block == 0) {
+			*fblock = 0;
+			return 0;
+		}
+
+		/* Jump to the next level */
+		l--;
+
+		/* Termination condition - we have address of data block loaded
+		 */
+		if (l == 0)
+			break;
+
+		/* Visit the next level */
+		blk_off_in_lvl %= fs->inode_blocks_per_level[l];
+		off_in_blk = (u32int)(blk_off_in_lvl / fs->inode_blocks_per_level[l - 1]);
+	}
+
+	*fblock = current_block;
+
+	return 0;
+}
+
+
+int ext4_fs_get_inode_dblk_idx(struct ext4_inode_ref *inode_ref,
+			       ext4_lblk_t iblock, ext4_fsblk_t *fblock,
+			       bool support_unwritten)
+{
+	return ext4_fs_get_inode_dblk_idx_internal(inode_ref, iblock, fblock,
+						   false, support_unwritten);
+}
+
+int ext4_fs_init_inode_dblk_idx(struct ext4_inode_ref *inode_ref,
+				ext4_lblk_t iblock, ext4_fsblk_t *fblock)
+{
+	return ext4_fs_get_inode_dblk_idx_internal(inode_ref, iblock, fblock,
+						   true, true);
+}
+
+static int ext4_fs_set_inode_data_block_index(struct ext4_inode_ref *inode_ref,
+				       ext4_lblk_t iblock, ext4_fsblk_t fblock)
+{
+	struct ext4_fs *fs = inode_ref->fs;
+
+	/* Handle inode using extents */
+	if ((ext4_sb_feature_incom(&fs->sb, EXT4_FINCOM_EXTENTS)) &&
+	    (ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_EXTENTS))) {
+		/* Not reachable */
+		werrstr("impossible feature combination in extents");
+		return -1;
+	}
+
+	/* Handle simple case when we are dealing with direct reference */
+	if (iblock < EXT4_INODE_DIRECT_BLOCK_COUNT) {
+		ext4_inode_set_direct_block(inode_ref->inode, (u32int)iblock,
+					    (u32int)fblock);
+		inode_ref->dirty = true;
+
+		return 0;
+	}
+
+	/* Determine the indirection level needed to get the desired block */
+	unsigned int l = 0;
+	unsigned int i;
+	for (i = 1; i < 4; i++) {
+		if (iblock < fs->inode_block_limits[i]) {
+			l = i;
+			break;
+		}
+	}
+
+	if (l == 0) {
+		werrstr(Eio);
+		return -1;
+	}
+
+	u32int block_size = ext4_sb_get_block_size(&fs->sb);
+
+	/* Compute offsets for the topmost level */
+	u32int blk_off_in_lvl = (u32int)(iblock - fs->inode_block_limits[l - 1]);
+	ext4_fsblk_t current_block =
+			ext4_inode_get_indirect_block(inode_ref->inode, l - 1);
+	u32int off_in_blk = (u32int)(blk_off_in_lvl / fs->inode_blocks_per_level[l - 1]);
+
+	ext4_fsblk_t new_blk;
+
+	struct ext4_block block;
+	struct ext4_block new_block;
+
+	/* Is needed to allocate indirect block on the i-node level */
+	if (current_block == 0) {
+		/* Allocate new indirect block */
+		ext4_fsblk_t goal;
+		int rc = ext4_fs_indirect_find_goal(inode_ref, &goal);
+		if (rc != 0)
+			return rc;
+
+		rc = ext4_balloc_alloc_block(inode_ref, goal, &new_blk);
+		if (rc != 0)
+			return rc;
+
+		/* Update i-node */
+		ext4_inode_set_indirect_block(inode_ref->inode, l - 1,
+				(u32int)new_blk);
+		inode_ref->dirty = true;
+
+		/* Load newly allocated block */
+		rc = ext4_trans_block_get_noread(fs->bdev, &new_block, new_blk);
+		if (rc != 0) {
+			ext4_balloc_free_block(inode_ref, new_blk);
+			return rc;
+		}
+
+		/* Initialize new block */
+		memset(new_block.data, 0, block_size);
+		ext4_trans_set_block_dirty(new_block.buf);
+
+		/* Put back the allocated block */
+		rc = ext4_block_set(fs->bdev, &new_block);
+		if (rc != 0)
+			return rc;
+
+		current_block = new_blk;
+	}
+
+	/*
+	 * Navigate through other levels, until we find the block number
+	 * or find null reference meaning we are dealing with sparse file
+	 */
+	while (l > 0) {
+		int rc = ext4_trans_block_get(fs->bdev, &block, current_block);
+		if (rc != 0)
+			return rc;
+
+		current_block = to_le32(((u32int *)block.data)[off_in_blk]);
+		if ((l > 1) && (current_block == 0)) {
+			ext4_fsblk_t goal;
+			rc = ext4_fs_indirect_find_goal(inode_ref, &goal);
+			if (rc != 0) {
+				ext4_block_set(fs->bdev, &block);
+				return rc;
+			}
+
+			/* Allocate new block */
+			rc =
+			    ext4_balloc_alloc_block(inode_ref, goal, &new_blk);
+			if (rc != 0) {
+				ext4_block_set(fs->bdev, &block);
+				return rc;
+			}
+
+			/* Load newly allocated block */
+			rc = ext4_trans_block_get_noread(fs->bdev, &new_block,
+					    new_blk);
+
+			if (rc != 0) {
+				ext4_block_set(fs->bdev, &block);
+				return rc;
+			}
+
+			/* Initialize allocated block */
+			memset(new_block.data, 0, block_size);
+			ext4_trans_set_block_dirty(new_block.buf);
+
+			rc = ext4_block_set(fs->bdev, &new_block);
+			if (rc != 0) {
+				ext4_block_set(fs->bdev, &block);
+				return rc;
+			}
+
+			/* Write block address to the parent */
+			u32int * p = (u32int * )block.data;
+			p[off_in_blk] = to_le32((u32int)new_blk);
+			ext4_trans_set_block_dirty(block.buf);
+			current_block = new_blk;
+		}
+
+		/* Will be finished, write the fblock address */
+		if (l == 1) {
+			u32int * p = (u32int * )block.data;
+			p[off_in_blk] = to_le32((u32int)fblock);
+			ext4_trans_set_block_dirty(block.buf);
+		}
+
+		rc = ext4_block_set(fs->bdev, &block);
+		if (rc != 0)
+			return rc;
+
+		l--;
+
+		/*
+		 * If we are on the last level, break here as
+		 * there is no next level to visit
+		 */
+		if (l == 0)
+			break;
+
+		/* Visit the next level */
+		blk_off_in_lvl %= fs->inode_blocks_per_level[l];
+		off_in_blk = (u32int)(blk_off_in_lvl / fs->inode_blocks_per_level[l - 1]);
+	}
+
+	return 0;
+}
+
+
+int ext4_fs_append_inode_dblk(struct ext4_inode_ref *inode_ref,
+			      ext4_fsblk_t *fblock, ext4_lblk_t *iblock)
+{
+	/* Handle extents separately */
+	if ((ext4_sb_feature_incom(&inode_ref->fs->sb, EXT4_FINCOM_EXTENTS)) &&
+	    (ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_EXTENTS))) {
+		int rc;
+		ext4_fsblk_t current_fsblk;
+		struct ext4_sblock *sb = &inode_ref->fs->sb;
+		u64int inode_size = ext4_inode_get_size(sb, inode_ref->inode);
+		u32int block_size = ext4_sb_get_block_size(sb);
+		*iblock = (u32int)((inode_size + block_size - 1) / block_size);
+
+		rc = ext4_extent_get_blocks(inode_ref, *iblock, 1,
+						&current_fsblk, true, nil);
+		if (rc != 0)
+			return rc;
+
+		*fblock = current_fsblk;
+		assert(*fblock);
+
+		ext4_inode_set_size(inode_ref->inode, inode_size + block_size);
+		inode_ref->dirty = true;
+
+
+		return rc;
+	}
+
+	struct ext4_sblock *sb = &inode_ref->fs->sb;
+
+	/* Compute next block index and allocate data block */
+	u64int inode_size = ext4_inode_get_size(sb, inode_ref->inode);
+	u32int block_size = ext4_sb_get_block_size(sb);
+
+	/* Align size i-node size */
+	if ((inode_size % block_size) != 0)
+		inode_size += block_size - (inode_size % block_size);
+
+	/* Logical blocks are numbered from 0 */
+	u32int new_block_idx = (u32int)(inode_size / block_size);
+
+	/* Allocate new physical block */
+	ext4_fsblk_t goal, phys_block;
+	int rc = ext4_fs_indirect_find_goal(inode_ref, &goal);
+	if (rc != 0)
+		return rc;
+
+	rc = ext4_balloc_alloc_block(inode_ref, goal, &phys_block);
+	if (rc != 0)
+		return rc;
+
+	/* Add physical block address to the i-node */
+	rc = ext4_fs_set_inode_data_block_index(inode_ref, new_block_idx,
+						phys_block);
+	if (rc != 0) {
+		ext4_balloc_free_block(inode_ref, phys_block);
+		return rc;
+	}
+
+	/* Update i-node */
+	ext4_inode_set_size(inode_ref->inode, inode_size + block_size);
+	inode_ref->dirty = true;
+
+	*fblock = phys_block;
+	*iblock = new_block_idx;
+
+	return 0;
+}
+
+void ext4_fs_inode_links_count_inc(struct ext4_inode_ref *inode_ref)
+{
+	u16int link;
+	bool is_dx;
+	link = ext4_inode_get_links_cnt(inode_ref->inode);
+	link++;
+	ext4_inode_set_links_cnt(inode_ref->inode, link);
+
+	is_dx = ext4_sb_feature_com(&inode_ref->fs->sb, EXT4_FCOM_DIR_INDEX) &&
+		ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_INDEX);
+
+	if (is_dx && link > 1) {
+		if (link >= EXT4_LINK_MAX || link == 2) {
+			ext4_inode_set_links_cnt(inode_ref->inode, 1);
+
+			u32int v;
+			v = ext4_get32(&inode_ref->fs->sb, features_read_only);
+			v |= EXT4_FRO_COM_DIR_NLINK;
+			ext4_set32(&inode_ref->fs->sb, features_read_only, v);
+		}
+	}
+}
+
+void ext4_fs_inode_links_count_dec(struct ext4_inode_ref *inode_ref)
+{
+	u16int links = ext4_inode_get_links_cnt(inode_ref->inode);
+	if (!ext4_inode_is_type(&inode_ref->fs->sb, inode_ref->inode,
+				EXT4_INODE_MODE_DIRECTORY)) {
+		if (links > 0)
+			ext4_inode_set_links_cnt(inode_ref->inode, links - 1);
+		return;
+	}
+
+	if (links > 2)
+		ext4_inode_set_links_cnt(inode_ref->inode, links - 1);
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_hash.c
@@ -1,0 +1,313 @@
+/*
+ * Copyright (c) 2013 Grzegorz Kostka ([email protected])
+ *
+ * FreeBSD:
+ * Copyright (c) 2010, 2013 Zheng Liu <[email protected]>
+ * Copyright (c) 2012, Vyacheslav Matyushin
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ * The following notice applies to the code in ext2_half_md4():
+ *
+ * Copyright (C) 1990-2, RSA Data Security, Inc. All rights reserved.
+ *
+ * License to copy and use this software is granted provided that it
+ * is identified as the "RSA Data Security, Inc. MD4 Message-Digest
+ * Algorithm" in all material mentioning or referencing this software
+ * or this function.
+ *
+ * License is also granted to make and use derivative works provided
+ * that such works are identified as "derived from the RSA Data
+ * Security, Inc. MD4 Message-Digest Algorithm" in all material
+ * mentioning or referencing the derived work.
+ *
+ * RSA Data Security, Inc. makes no representations concerning either
+ * the merchantability of this software or the suitability of this
+ * software for any particular purpose. It is provided "as is"
+ * without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this
+ * documentation and/or software.
+ */
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+
+/* F, G, and H are MD4 functions */
+#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
+#define G(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+
+/* ROTATE_LEFT rotates x left n bits */
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+
+/*
+ * FF, GG, and HH are transformations for rounds 1, 2, and 3.
+ * Rotation is separated from addition to prevent recomputation.
+ */
+#define FF(a, b, c, d, x, s)                                                   \
+	{                                                                      \
+		(a) += F((b), (c), (d)) + (x);                                 \
+		(a) = ROTATE_LEFT((a), (s));                                   \
+	\
+}
+
+#define GG(a, b, c, d, x, s)                                                   \
+	{                                                                      \
+		(a) += G((b), (c), (d)) + (x) + (u32int)0x5A827999;          \
+		(a) = ROTATE_LEFT((a), (s));                                   \
+	\
+}
+
+#define HH(a, b, c, d, x, s)                                                   \
+	{                                                                      \
+		(a) += H((b), (c), (d)) + (x) + (u32int)0x6ED9EBA1;          \
+		(a) = ROTATE_LEFT((a), (s));                                   \
+	\
+}
+
+/*
+ * MD4 basic transformation.  It transforms state based on block.
+ *
+ * This is a half md4 algorithm since Linux uses this algorithm for dir
+ * index.  This function is derived from the RSA Data Security, Inc. MD4
+ * Message-Digest Algorithm and was modified as necessary.
+ *
+ * The return value of this function is u32int in Linux, but actually we don't
+ * need to check this value, so in our version this function doesn't return any
+ * value.
+ */
+static void ext2_half_md4(u32int hash[4], u32int data[8])
+{
+	u32int a = hash[0], b = hash[1], c = hash[2], d = hash[3];
+
+	/* Round 1 */
+	FF(a, b, c, d, data[0], 3);
+	FF(d, a, b, c, data[1], 7);
+	FF(c, d, a, b, data[2], 11);
+	FF(b, c, d, a, data[3], 19);
+	FF(a, b, c, d, data[4], 3);
+	FF(d, a, b, c, data[5], 7);
+	FF(c, d, a, b, data[6], 11);
+	FF(b, c, d, a, data[7], 19);
+
+	/* Round 2 */
+	GG(a, b, c, d, data[1], 3);
+	GG(d, a, b, c, data[3], 5);
+	GG(c, d, a, b, data[5], 9);
+	GG(b, c, d, a, data[7], 13);
+	GG(a, b, c, d, data[0], 3);
+	GG(d, a, b, c, data[2], 5);
+	GG(c, d, a, b, data[4], 9);
+	GG(b, c, d, a, data[6], 13);
+
+	/* Round 3 */
+	HH(a, b, c, d, data[3], 3);
+	HH(d, a, b, c, data[7], 9);
+	HH(c, d, a, b, data[2], 11);
+	HH(b, c, d, a, data[6], 15);
+	HH(a, b, c, d, data[1], 3);
+	HH(d, a, b, c, data[5], 9);
+	HH(c, d, a, b, data[0], 11);
+	HH(b, c, d, a, data[4], 15);
+
+	hash[0] += a;
+	hash[1] += b;
+	hash[2] += c;
+	hash[3] += d;
+}
+
+/*
+ * Tiny Encryption Algorithm.
+ */
+static void ext2_tea(u32int hash[4], u32int data[8])
+{
+	u32int tea_delta = 0x9E3779B9;
+	u32int sum;
+	u32int x = hash[0], y = hash[1];
+	int n = 16;
+	int i = 1;
+
+	while (n-- > 0) {
+		sum = i * tea_delta;
+		x += ((y << 4) + data[0]) ^ (y + sum) ^ ((y >> 5) + data[1]);
+		y += ((x << 4) + data[2]) ^ (x + sum) ^ ((x >> 5) + data[3]);
+		i++;
+	}
+
+	hash[0] += x;
+	hash[1] += y;
+}
+
+static u32int ext2_legacy_hash(const char *name, int len, int unsigned_char)
+{
+	u32int h0, h1 = 0x12A3FE2D, h2 = 0x37ABE8F9;
+	u32int multi = 0x6D22F5;
+	const unsigned char *uname = (const unsigned char *)name;
+	const signed char *sname = (const signed char *)name;
+	int val, i;
+
+	for (i = 0; i < len; i++) {
+		if (unsigned_char)
+			val = (unsigned int)*uname++;
+		else
+			val = (int)*sname++;
+
+		h0 = h2 + (h1 ^ (val * multi));
+		if (h0 & 0x80000000)
+			h0 -= 0x7FFFFFFF;
+		h2 = h1;
+		h1 = h0;
+	}
+
+	return (h1 << 1);
+}
+
+static void ext2_prep_hashbuf(const char *src, u32int slen, u32int *dst,
+			      int dlen, int unsigned_char)
+{
+	u32int padding = slen | (slen << 8) | (slen << 16) | (slen << 24);
+	u32int buf_val;
+	int len, i;
+	int buf_byte;
+	const unsigned char *ubuf = (const unsigned char *)src;
+	const signed char *sbuf = (const signed char *)src;
+
+	if (slen > (u32int)dlen)
+		len = dlen;
+	else
+		len = slen;
+
+	buf_val = padding;
+
+	for (i = 0; i < len; i++) {
+		if (unsigned_char)
+			buf_byte = (unsigned int)ubuf[i];
+		else
+			buf_byte = (int)sbuf[i];
+
+		if ((i % 4) == 0)
+			buf_val = padding;
+
+		buf_val <<= 8;
+		buf_val += buf_byte;
+
+		if ((i % 4) == 3) {
+			*dst++ = buf_val;
+			dlen -= sizeof(u32int);
+			buf_val = padding;
+		}
+	}
+
+	dlen -= sizeof(u32int);
+	if (dlen >= 0)
+		*dst++ = buf_val;
+
+	dlen -= sizeof(u32int);
+	while (dlen >= 0) {
+		*dst++ = padding;
+		dlen -= sizeof(u32int);
+	}
+}
+
+int ext2_htree_hash(const char *name, int len, const u32int *hash_seed,
+		    int hash_version, u32int *hash_major,
+		    u32int *hash_minor)
+{
+	u32int hash[4];
+	u32int data[8];
+	u32int major, minor = 0;
+	int unsigned_char = 0;
+
+	if (!name || !hash_major)
+		return (-1);
+
+	if (len < 1 || len > 255)
+		goto error;
+
+	hash[0] = 0x67452301;
+	hash[1] = 0xEFCDAB89;
+	hash[2] = 0x98BADCFE;
+	hash[3] = 0x10325476;
+
+	if (hash_seed)
+		memcpy(hash, hash_seed, sizeof(hash));
+
+	switch (hash_version) {
+	case EXT2_HTREE_TEA_UNSIGNED:
+		unsigned_char = 1;
+		/* FALLTHRU */
+	case EXT2_HTREE_TEA:
+		while (len > 0) {
+			ext2_prep_hashbuf(name, len, data, 16, unsigned_char);
+			ext2_tea(hash, data);
+			len -= 16;
+			name += 16;
+		}
+		major = hash[0];
+		minor = hash[1];
+		break;
+	case EXT2_HTREE_LEGACY_UNSIGNED:
+		unsigned_char = 1;
+		/* FALLTHRU */
+	case EXT2_HTREE_LEGACY:
+		major = ext2_legacy_hash(name, len, unsigned_char);
+		break;
+	case EXT2_HTREE_HALF_MD4_UNSIGNED:
+		unsigned_char = 1;
+		/* FALLTHRU */
+	case EXT2_HTREE_HALF_MD4:
+		while (len > 0) {
+			ext2_prep_hashbuf(name, len, data, 32, unsigned_char);
+			ext2_half_md4(hash, data);
+			len -= 32;
+			name += 32;
+		}
+		major = hash[1];
+		minor = hash[2];
+		break;
+	default:
+		goto error;
+	}
+
+	major &= ~1;
+	if (major == (EXT2_HTREE_EOF << 1))
+		major = (EXT2_HTREE_EOF - 1) << 1;
+	*hash_major = major;
+	if (hash_minor)
+		*hash_minor = minor;
+
+	return 0;
+
+error:
+	*hash_major = 0;
+	if (hash_minor)
+		*hash_minor = 0;
+	werrstr("unsupported hash version: %d", hash_version);
+	return -1;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_ialloc.c
@@ -1,0 +1,313 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_trans.h"
+#include "ext4_ialloc.h"
+#include "ext4_super.h"
+#include "ext4_crc32.h"
+#include "ext4_fs.h"
+#include "ext4_blockdev.h"
+#include "ext4_block_group.h"
+#include "ext4_bitmap.h"
+
+/**@brief  Convert i-node number to relative index in block group.
+ * @param sb    Superblock
+ * @param inode I-node number to be converted
+ * @return Index of the i-node in the block group
+ */
+static u32int ext4_ialloc_inode_to_bgidx(struct ext4_sblock *sb,
+					   u32int inode)
+{
+	u32int inodes_per_group = ext4_get32(sb, inodes_per_group);
+	return (inode - 1) % inodes_per_group;
+}
+
+/**@brief Convert relative index of i-node to absolute i-node number.
+ * @param sb    Superblock
+ * @param index Index to be converted
+ * @return Absolute number of the i-node
+ *
+ */
+static u32int ext4_ialloc_bgidx_to_inode(struct ext4_sblock *sb,
+					   u32int index, u32int bgid)
+{
+	u32int inodes_per_group = ext4_get32(sb, inodes_per_group);
+	return bgid * inodes_per_group + (index + 1);
+}
+
+/**@brief Compute block group number from the i-node number.
+ * @param sb    Superblock
+ * @param inode I-node number to be found the block group for
+ * @return Block group number computed from i-node number
+ */
+static u32int ext4_ialloc_get_bgid_of_inode(struct ext4_sblock *sb,
+					      u32int inode)
+{
+	u32int inodes_per_group = ext4_get32(sb, inodes_per_group);
+	return (inode - 1) / inodes_per_group;
+}
+
+static u32int ext4_ialloc_bitmap_csum(struct ext4_sblock *sb,	void *bitmap)
+{
+	u32int csum = 0;
+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+		u32int inodes_per_group =
+			ext4_get32(sb, inodes_per_group);
+
+		/* First calculate crc32 checksum against fs uuid */
+		csum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid, sizeof(sb->uuid));
+		/* Then calculate crc32 checksum against inode bitmap */
+		csum = ext4_crc32c(csum, bitmap, (inodes_per_group + 7) / 8);
+	}
+	return csum;
+}
+
+void ext4_ialloc_set_bitmap_csum(struct ext4_sblock *sb, struct ext4_bgroup *bg, void *bitmap)
+{
+	int desc_size = ext4_sb_get_desc_size(sb);
+	u32int csum = ext4_ialloc_bitmap_csum(sb, bitmap);
+	u16int lo_csum = to_le16(csum & 0xFFFF),
+		 hi_csum = to_le16(csum >> 16);
+
+	if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+		return;
+
+	/* See if we need to assign a 32bit checksum */
+	bg->inode_bitmap_csum_lo = lo_csum;
+	if (desc_size == EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		bg->inode_bitmap_csum_hi = hi_csum;
+
+}
+
+static bool
+ext4_ialloc_verify_bitmap_csum(struct ext4_sblock *sb, struct ext4_bgroup *bg, void *bitmap)
+{
+	int desc_size = ext4_sb_get_desc_size(sb);
+	u32int csum = ext4_ialloc_bitmap_csum(sb, bitmap);
+	u16int lo_csum = to_le16(csum & 0xFFFF),
+		 hi_csum = to_le16(csum >> 16);
+
+	if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+		return true;
+
+	if (bg->inode_bitmap_csum_lo != lo_csum)
+		return false;
+
+	if (desc_size == EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		if (bg->inode_bitmap_csum_hi != hi_csum)
+			return false;
+
+	return true;
+}
+
+int ext4_ialloc_free_inode(struct ext4_fs *fs, u32int index, bool is_dir)
+{
+	struct ext4_sblock *sb = &fs->sb;
+
+	/* Compute index of block group and load it */
+	u32int block_group = ext4_ialloc_get_bgid_of_inode(sb, index);
+
+	struct ext4_block_group_ref bg_ref;
+	int rc = ext4_fs_get_block_group_ref(fs, block_group, &bg_ref);
+	if (rc != 0)
+		return rc;
+
+	struct ext4_bgroup *bg = bg_ref.block_group;
+
+	/* Load i-node bitmap */
+	ext4_fsblk_t bitmap_block_addr =
+	    ext4_bg_get_inode_bitmap(bg, sb);
+
+	struct ext4_block b;
+	rc = ext4_trans_block_get(fs->bdev, &b, bitmap_block_addr);
+	if (rc != 0)
+		return rc;
+
+	if (!ext4_ialloc_verify_bitmap_csum(sb, bg, b.data)) {
+		ext4_dbg(DEBUG_IALLOC,
+			DBG_WARN "Bitmap checksum failed."
+			"Group: %ud\n",
+			bg_ref.index);
+	}
+
+	/* Free i-node in the bitmap */
+	u32int index_in_group = ext4_ialloc_inode_to_bgidx(sb, index);
+	ext4_bmap_bit_clr(b.data, index_in_group);
+	ext4_ialloc_set_bitmap_csum(sb, bg, b.data);
+	ext4_trans_set_block_dirty(b.buf);
+
+	/* Put back the block with bitmap */
+	rc = ext4_block_set(fs->bdev, &b);
+	if (rc != 0) {
+		/* Error in saving bitmap */
+		ext4_fs_put_block_group_ref(&bg_ref);
+		return rc;
+	}
+
+	/* If released i-node is a directory, decrement used directories count
+	 */
+	if (is_dir) {
+		u32int bg_used_dirs = ext4_bg_get_used_dirs_count(bg, sb);
+		bg_used_dirs--;
+		ext4_bg_set_used_dirs_count(bg, sb, bg_used_dirs);
+	}
+
+	/* Update block group free inodes count */
+	u32int free_inodes = ext4_bg_get_free_inodes_count(bg, sb);
+	free_inodes++;
+	ext4_bg_set_free_inodes_count(bg, sb, free_inodes);
+
+	bg_ref.dirty = true;
+
+	/* Put back the modified block group */
+	rc = ext4_fs_put_block_group_ref(&bg_ref);
+	if (rc != 0)
+		return rc;
+
+	/* Update superblock free inodes count */
+	ext4_set32(sb, free_inodes_count,
+		   ext4_get32(sb, free_inodes_count) + 1);
+
+	return 0;
+}
+
+int ext4_ialloc_alloc_inode(struct ext4_fs *fs, u32int *idx, bool is_dir)
+{
+	struct ext4_sblock *sb = &fs->sb;
+
+	u32int bgid = fs->last_inode_bg_id;
+	u32int bg_count = ext4_block_group_cnt(sb);
+	u32int sb_free_inodes = ext4_get32(sb, free_inodes_count);
+	bool rewind = false;
+
+	/* Try to find free i-node in all block groups */
+	while (bgid <= bg_count) {
+
+		if (bgid == bg_count) {
+			if (rewind)
+				break;
+			bg_count = fs->last_inode_bg_id;
+			bgid = 0;
+			rewind = true;
+			continue;
+		}
+
+		/* Load block group to check */
+		struct ext4_block_group_ref bg_ref;
+		int rc = ext4_fs_get_block_group_ref(fs, bgid, &bg_ref);
+		if (rc != 0)
+			return rc;
+
+		struct ext4_bgroup *bg = bg_ref.block_group;
+
+		/* Read necessary values for algorithm */
+		u32int free_inodes = ext4_bg_get_free_inodes_count(bg, sb);
+		u32int used_dirs = ext4_bg_get_used_dirs_count(bg, sb);
+
+		/* Check if this block group is good candidate for allocation */
+		if (free_inodes > 0) {
+			/* Load block with bitmap */
+			ext4_fsblk_t bmp_blk_add = ext4_bg_get_inode_bitmap(bg, sb);
+
+			struct ext4_block b;
+			rc = ext4_trans_block_get(fs->bdev, &b, bmp_blk_add);
+			if (rc != 0) {
+				ext4_fs_put_block_group_ref(&bg_ref);
+				return rc;
+			}
+
+			if (!ext4_ialloc_verify_bitmap_csum(sb, bg, b.data)) {
+				ext4_dbg(DEBUG_IALLOC,
+					DBG_WARN "Bitmap checksum failed."
+					"Group: %ud\n",
+					bg_ref.index);
+			}
+
+			/* Try to allocate i-node in the bitmap */
+			u32int inodes_in_bg;
+			u32int idx_in_bg;
+
+			inodes_in_bg = ext4_inodes_in_group_cnt(sb, bgid);
+			bool no_space;
+			rc = ext4_bmap_bit_find_clr(b.data, 0, inodes_in_bg, &idx_in_bg, &no_space);
+			/* Block group does not have any free i-node */
+			if (no_space) {
+				rc = ext4_block_set(fs->bdev, &b);
+				if (rc != 0) {
+					ext4_fs_put_block_group_ref(&bg_ref);
+					return rc;
+				}
+
+				rc = ext4_fs_put_block_group_ref(&bg_ref);
+				if (rc != 0)
+					return rc;
+
+				continue;
+			}
+
+			ext4_bmap_bit_set(b.data, idx_in_bg);
+
+			/* Free i-node found, save the bitmap */
+			ext4_ialloc_set_bitmap_csum(sb,bg,
+						    b.data);
+			ext4_trans_set_block_dirty(b.buf);
+
+			ext4_block_set(fs->bdev, &b);
+			if (rc != 0) {
+				ext4_fs_put_block_group_ref(&bg_ref);
+				return rc;
+			}
+
+			/* Modify filesystem counters */
+			free_inodes--;
+			ext4_bg_set_free_inodes_count(bg, sb, free_inodes);
+
+			/* Increment used directories counter */
+			if (is_dir) {
+				used_dirs++;
+				ext4_bg_set_used_dirs_count(bg, sb, used_dirs);
+			}
+
+			/* Decrease unused inodes count */
+			u32int unused =
+			    ext4_bg_get_itable_unused(bg, sb);
+
+			u32int free = inodes_in_bg - unused;
+
+			if (idx_in_bg >= free) {
+				unused = inodes_in_bg - (idx_in_bg + 1);
+				ext4_bg_set_itable_unused(bg, sb, unused);
+			}
+
+			/* Save modified block group */
+			bg_ref.dirty = true;
+
+			rc = ext4_fs_put_block_group_ref(&bg_ref);
+			if (rc != 0)
+				return rc;
+
+			/* Update superblock */
+			sb_free_inodes--;
+			ext4_set32(sb, free_inodes_count, sb_free_inodes);
+
+			/* Compute the absolute i-nodex number */
+			*idx = ext4_ialloc_bgidx_to_inode(sb, idx_in_bg, bgid);
+
+			fs->last_inode_bg_id = bgid;
+
+			return 0;
+		}
+
+		/* Block group not modified, put it and jump to the next block
+		 * group */
+		ext4_fs_put_block_group_ref(&bg_ref);
+		if (rc != 0)
+			return rc;
+
+		++bgid;
+	}
+
+	werrstr(Enospc);
+	return -1;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_inode.c
@@ -1,0 +1,365 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_inode.h"
+#include "ext4_super.h"
+
+/**@brief  Compute number of bits for block count.
+ * @param block_size Filesystem block_size
+ * @return Number of bits
+ */
+static u32int ext4_inode_block_bits_count(u32int block_size)
+{
+	u32int bits = 8;
+	u32int size = block_size;
+
+	do {
+		bits++;
+		size = size >> 1;
+	} while (size > 256);
+
+	return bits;
+}
+
+u32int ext4_inode_get_mode(struct ext4_sblock *sb, struct ext4_inode *inode)
+{
+	u32int v = to_le16(inode->mode);
+
+	if (ext4_get32(sb, creator_os) == EXT4_SUPERBLOCK_OS_HURD) {
+		v |= ((u32int)to_le16(inode->osd2.hurd2.mode_high)) << 16;
+	}
+
+	return v;
+}
+
+void ext4_inode_set_mode(struct ext4_sblock *sb, struct ext4_inode *inode,
+			 u32int mode)
+{
+	inode->mode = to_le16((mode << 16) >> 16);
+
+	if (ext4_get32(sb, creator_os) == EXT4_SUPERBLOCK_OS_HURD)
+		inode->osd2.hurd2.mode_high = to_le16(mode >> 16);
+}
+
+u32int ext4_inode_get_uid(struct ext4_inode *inode)
+{
+	return to_le32(inode->uid);
+}
+
+void ext4_inode_set_uid(struct ext4_inode *inode, u32int uid)
+{
+	inode->uid = to_le32(uid);
+}
+
+u64int ext4_inode_get_size(struct ext4_sblock *sb, struct ext4_inode *inode)
+{
+	u64int v = to_le32(inode->size_lo);
+
+	if ((ext4_get32(sb, rev_level) > 0) &&
+	    (ext4_inode_is_type(sb, inode, EXT4_INODE_MODE_FILE)))
+		v |= ((u64int)to_le32(inode->size_hi)) << 32;
+
+	return v;
+}
+
+void ext4_inode_set_size(struct ext4_inode *inode, u64int size)
+{
+	inode->size_lo = to_le32((size << 32) >> 32);
+	inode->size_hi = to_le32(size >> 32);
+}
+
+u32int ext4_inode_get_csum(struct ext4_sblock *sb, struct ext4_inode *inode)
+{
+	u16int inode_size = ext4_get16(sb, inode_size);
+	u32int v = to_le16(inode->osd2.linux2.checksum_lo);
+
+	if (inode_size > EXT4_GOOD_OLD_INODE_SIZE)
+		v |= ((u32int)to_le16(inode->checksum_hi)) << 16;
+
+	return v;
+}
+
+void ext4_inode_set_csum(struct ext4_sblock *sb, struct ext4_inode *inode,
+			u32int checksum)
+{
+	u16int inode_size = ext4_get16(sb, inode_size);
+	inode->osd2.linux2.checksum_lo =
+		to_le16((checksum << 16) >> 16);
+
+	if (inode_size > EXT4_GOOD_OLD_INODE_SIZE)
+		inode->checksum_hi = to_le16(checksum >> 16);
+
+}
+
+u32int ext4_inode_get_access_time(struct ext4_inode *inode)
+{
+	return to_le32(inode->access_time);
+}
+void ext4_inode_set_access_time(struct ext4_inode *inode, u32int time)
+{
+	inode->access_time = to_le32(time);
+}
+
+u32int ext4_inode_get_change_inode_time(struct ext4_inode *inode)
+{
+	return to_le32(inode->change_inode_time);
+}
+void ext4_inode_set_change_inode_time(struct ext4_inode *inode, u32int time)
+{
+	inode->change_inode_time = to_le32(time);
+}
+
+u32int ext4_inode_get_modif_time(struct ext4_inode *inode)
+{
+	return to_le32(inode->modification_time);
+}
+
+void ext4_inode_set_modif_time(struct ext4_inode *inode, u32int time)
+{
+	inode->modification_time = to_le32(time);
+}
+
+u32int ext4_inode_get_del_time(struct ext4_inode *inode)
+{
+	return to_le32(inode->deletion_time);
+}
+
+void ext4_inode_set_del_time(struct ext4_inode *inode, u32int time)
+{
+	inode->deletion_time = to_le32(time);
+}
+
+u32int ext4_inode_get_creation_time(struct ext4_inode *inode)
+{
+	return to_le32(inode->crtime);
+}
+
+u32int ext4_inode_get_gid(struct ext4_inode *inode)
+{
+	return to_le32(inode->gid);
+}
+void ext4_inode_set_gid(struct ext4_inode *inode, u32int gid)
+{
+	inode->gid = to_le32(gid);
+}
+
+u16int ext4_inode_get_links_cnt(struct ext4_inode *inode)
+{
+	return to_le16(inode->links_count);
+}
+void ext4_inode_set_links_cnt(struct ext4_inode *inode, u16int cnt)
+{
+	inode->links_count = to_le16(cnt);
+}
+
+u64int ext4_inode_get_blocks_count(struct ext4_sblock *sb,
+				     struct ext4_inode *inode)
+{
+	u64int cnt = to_le32(inode->blocks_count_lo);
+
+	if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_HUGE_FILE)) {
+
+		/* 48-bit field */
+		cnt |= (u64int)to_le16(inode->osd2.linux2.blocks_high) << 32;
+
+		if (ext4_inode_has_flag(inode, EXT4_INODE_FLAG_HUGE_FILE)) {
+
+			u32int block_count = ext4_sb_get_block_size(sb);
+			u32int b = ext4_inode_block_bits_count(block_count);
+			return cnt << (b - 9);
+		}
+	}
+
+	return cnt;
+}
+
+int ext4_inode_set_blocks_count(struct ext4_sblock *sb,
+				struct ext4_inode *inode, u64int count)
+{
+	/* 32-bit maximum */
+	u64int max = 0;
+	max = ~max >> 32;
+
+	if (count <= max) {
+		inode->blocks_count_lo = to_le32((u32int)count);
+		inode->osd2.linux2.blocks_high = 0;
+		ext4_inode_clear_flag(inode, EXT4_INODE_FLAG_HUGE_FILE);
+
+		return 0;
+	}
+
+	/* Check if there can be used huge files (many blocks) */
+	if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_HUGE_FILE)) {
+		werrstr(Einval);
+		return -1;
+	}
+
+	/* 48-bit maximum */
+	max = 0;
+	max = ~max >> 16;
+
+	if (count <= max) {
+		inode->blocks_count_lo = to_le32((u32int)count);
+		inode->osd2.linux2.blocks_high = to_le16((u16int)(count >> 32));
+		ext4_inode_clear_flag(inode, EXT4_INODE_FLAG_HUGE_FILE);
+	} else {
+		u32int block_count = ext4_sb_get_block_size(sb);
+		u32int block_bits =ext4_inode_block_bits_count(block_count);
+
+		ext4_inode_set_flag(inode, EXT4_INODE_FLAG_HUGE_FILE);
+		count = count >> (block_bits - 9);
+		inode->blocks_count_lo = to_le32((u32int)count);
+		inode->osd2.linux2.blocks_high = to_le16((u16int)(count >> 32));
+	}
+
+	return 0;
+}
+
+u32int ext4_inode_get_flags(struct ext4_inode *inode)
+{
+	return to_le32(inode->flags);
+}
+void ext4_inode_set_flags(struct ext4_inode *inode, u32int flags)
+{
+	inode->flags = to_le32(flags);
+}
+
+u32int ext4_inode_get_generation(struct ext4_inode *inode)
+{
+	return to_le32(inode->generation);
+}
+void ext4_inode_set_generation(struct ext4_inode *inode, u32int gen)
+{
+	inode->generation = to_le32(gen);
+}
+
+u16int ext4_inode_get_extra_isize(struct ext4_sblock *sb,
+				    struct ext4_inode *inode)
+{
+	u16int inode_size = ext4_get16(sb, inode_size);
+	if (inode_size > EXT4_GOOD_OLD_INODE_SIZE)
+		return to_le16(inode->extra_isize);
+	else
+		return 0;
+}
+
+void ext4_inode_set_extra_isize(struct ext4_sblock *sb,
+				struct ext4_inode *inode,
+				u16int size)
+{
+	u16int inode_size = ext4_get16(sb, inode_size);
+	if (inode_size > EXT4_GOOD_OLD_INODE_SIZE)
+		inode->extra_isize = to_le16(size);
+}
+
+u64int ext4_inode_get_file_acl(struct ext4_inode *inode,
+				 struct ext4_sblock *sb)
+{
+	u64int v = to_le32(inode->file_acl_lo);
+
+	if (ext4_get32(sb, creator_os) == EXT4_SUPERBLOCK_OS_LINUX)
+		v |= (u32int)to_le16(inode->osd2.linux2.file_acl_high) << 16;
+
+	return v;
+}
+
+void ext4_inode_set_file_acl(struct ext4_inode *inode, struct ext4_sblock *sb,
+			     u64int acl)
+{
+	inode->file_acl_lo = to_le32((acl << 32) >> 32);
+
+	if (ext4_get32(sb, creator_os) == EXT4_SUPERBLOCK_OS_LINUX)
+		inode->osd2.linux2.file_acl_high = to_le16((u16int)(acl >> 32));
+}
+
+u32int ext4_inode_get_direct_block(struct ext4_inode *inode, u32int idx)
+{
+	return to_le32(inode->blocks[idx]);
+}
+void ext4_inode_set_direct_block(struct ext4_inode *inode, u32int idx,
+				 u32int block)
+{
+	inode->blocks[idx] = to_le32(block);
+}
+
+u32int ext4_inode_get_indirect_block(struct ext4_inode *inode, u32int idx)
+{
+	return to_le32(inode->blocks[idx + EXT4_INODE_INDIRECT_BLOCK]);
+}
+
+void ext4_inode_set_indirect_block(struct ext4_inode *inode, u32int idx,
+				   u32int block)
+{
+	inode->blocks[idx + EXT4_INODE_INDIRECT_BLOCK] = to_le32(block);
+}
+
+u32int ext4_inode_get_dev(struct ext4_inode *inode)
+{
+	u32int dev_0, dev_1;
+	dev_0 = ext4_inode_get_direct_block(inode, 0);
+	dev_1 = ext4_inode_get_direct_block(inode, 1);
+
+	if (dev_0)
+		return dev_0;
+	else
+		return dev_1;
+}
+
+void ext4_inode_set_dev(struct ext4_inode *inode, u32int dev)
+{
+	if (dev & ~0xFFFF)
+		ext4_inode_set_direct_block(inode, 1, dev);
+	else
+		ext4_inode_set_direct_block(inode, 0, dev);
+}
+
+u32int ext4_inode_type(struct ext4_sblock *sb, struct ext4_inode *inode)
+{
+	return (ext4_inode_get_mode(sb, inode) & EXT4_INODE_MODE_TYPE_MASK);
+}
+
+bool ext4_inode_is_type(struct ext4_sblock *sb, struct ext4_inode *inode,
+			u32int type)
+{
+	return ext4_inode_type(sb, inode) == type;
+}
+
+bool ext4_inode_has_flag(struct ext4_inode *inode, u32int f)
+{
+	return ext4_inode_get_flags(inode) & f;
+}
+
+void ext4_inode_clear_flag(struct ext4_inode *inode, u32int f)
+{
+	u32int flags = ext4_inode_get_flags(inode);
+	flags = flags & (~f);
+	ext4_inode_set_flags(inode, flags);
+}
+
+void ext4_inode_set_flag(struct ext4_inode *inode, u32int f)
+{
+	u32int flags = ext4_inode_get_flags(inode);
+	flags = flags | f;
+	ext4_inode_set_flags(inode, flags);
+}
+
+bool ext4_inode_can_truncate(struct ext4_sblock *sb, struct ext4_inode *inode)
+{
+	if ((ext4_inode_has_flag(inode, EXT4_INODE_FLAG_APPEND)) ||
+	    (ext4_inode_has_flag(inode, EXT4_INODE_FLAG_IMMUTABLE)))
+		return false;
+
+	if ((ext4_inode_is_type(sb, inode, EXT4_INODE_MODE_FILE)) ||
+	    (ext4_inode_is_type(sb, inode, EXT4_INODE_MODE_DIRECTORY)) ||
+	    (ext4_inode_is_type(sb, inode, EXT4_INODE_MODE_SOFTLINK)))
+		return true;
+
+	return false;
+}
+
+struct ext4_extent_header *
+ext4_inode_get_extent_header(struct ext4_inode *inode)
+{
+	return (struct ext4_extent_header *)inode->blocks;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_journal.c
@@ -1,0 +1,2232 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_fs.h"
+#include "ext4_super.h"
+#include "ext4_journal.h"
+#include "ext4_blockdev.h"
+#include "ext4_crc32.h"
+#include "ext4_journal.h"
+
+/**@brief  Revoke entry during journal replay.*/
+struct revoke_entry {
+	/**@brief  Block number not to be replayed.*/
+	ext4_fsblk_t block;
+
+	/**@brief  For any transaction id smaller
+	 *         than trans_id, records of @block
+	 *         in those transactions should not
+	 *         be replayed.*/
+	u32int trans_id;
+
+	/**@brief  Revoke tree node.*/
+	RB_ENTRY(revoke_entry) revoke_node;
+};
+
+/**@brief  Valid journal replay information.*/
+struct recover_info {
+	/**@brief  Starting transaction id.*/
+	u32int start_trans_id;
+
+	/**@brief  Ending transaction id.*/
+	u32int last_trans_id;
+
+	/**@brief  Used as internal argument.*/
+	u32int this_trans_id;
+
+	/**@brief  No of transactions went through.*/
+	u32int trans_cnt;
+
+	/**@brief  RB-Tree storing revoke entries.*/
+	RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
+};
+
+/**@brief  Journal replay internal arguments.*/
+struct replay_arg {
+	/**@brief  Journal replay information.*/
+	struct recover_info *info;
+
+	/**@brief  Current block we are on.*/
+	u32int *this_block;
+
+	/**@brief  Current trans_id we are on.*/
+	u32int this_trans_id;
+};
+
+/* Make sure we wrap around the log correctly! */
+#define wrap(sb, var)						\
+do {									\
+	if (var >= jbd_get32((sb), maxlen))					\
+		var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));	\
+} while (0)
+
+static inline s32int
+trans_id_diff(u32int x, u32int y)
+{
+	s32int diff = x - y;
+	return diff;
+}
+
+static int
+jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
+{
+	if (a->block > b->block)
+		return 1;
+	else if (a->block < b->block)
+		return -1;
+	return 0;
+}
+
+static int
+jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b)
+{
+	if (a->lba > b->lba)
+		return 1;
+	else if (a->lba < b->lba)
+		return -1;
+	return 0;
+}
+
+static int
+jbd_revoke_rec_cmp(struct jbd_revoke_rec *a, struct jbd_revoke_rec *b)
+{
+	if (a->lba > b->lba)
+		return 1;
+	else if (a->lba < b->lba)
+		return -1;
+	return 0;
+}
+
+RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
+		     jbd_revoke_entry_cmp, static inline)
+RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
+		     jbd_block_rec_cmp, static inline)
+RB_GENERATE_INTERNAL(jbd_revoke_tree, jbd_revoke_rec, revoke_node,
+		     jbd_revoke_rec_cmp, static inline)
+
+#define jbd_alloc_revoke_entry() ext4_calloc(1, sizeof(struct revoke_entry))
+#define jbd_free_revoke_entry(addr) ext4_free(addr)
+
+static int jbd_has_csum(struct jbd_sb *jbd_sb)
+{
+	if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2))
+		return 2;
+
+	if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3))
+		return 3;
+
+	return 0;
+}
+
+static u32int jbd_sb_csum(struct jbd_sb *jbd_sb)
+{
+	u32int checksum = 0;
+
+	if (jbd_has_csum(jbd_sb)) {
+		u32int orig_checksum = jbd_sb->checksum;
+		jbd_set32(jbd_sb, checksum, 0);
+		/* Calculate crc32c checksum against tho whole superblock */
+		checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_sb,
+				JBD_SUPERBLOCK_SIZE);
+		jbd_sb->checksum = orig_checksum;
+	}
+	return checksum;
+}
+
+static void jbd_sb_csum_set(struct jbd_sb *jbd_sb)
+{
+	if (!jbd_has_csum(jbd_sb))
+		return;
+
+	jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb));
+}
+
+static bool
+jbd_verify_sb_csum(struct jbd_sb *jbd_sb)
+{
+	if (!jbd_has_csum(jbd_sb))
+		return true;
+
+	return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum);
+}
+
+static u32int jbd_meta_csum(struct jbd_fs *jbd_fs,
+			      struct jbd_bhdr *bhdr)
+{
+	u32int checksum = 0;
+
+	if (jbd_has_csum(&jbd_fs->sb)) {
+		u32int block_size = jbd_get32(&jbd_fs->sb, blocksize);
+		struct jbd_block_tail *tail =
+			(struct jbd_block_tail *)((char *)bhdr + block_size -
+				sizeof(struct jbd_block_tail));
+		u32int orig_checksum = tail->checksum;
+		tail->checksum = 0;
+
+		/* First calculate crc32c checksum against fs uuid */
+		checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
+				       sizeof(jbd_fs->sb.uuid));
+		/* Calculate crc32c checksum against tho whole block */
+		checksum = ext4_crc32c(checksum, bhdr,
+				block_size);
+		tail->checksum = orig_checksum;
+	}
+	return checksum;
+}
+
+static void jbd_meta_csum_set(struct jbd_fs *jbd_fs,
+			      struct jbd_bhdr *bhdr)
+{
+	u32int block_size = jbd_get32(&jbd_fs->sb, blocksize);
+	struct jbd_block_tail *tail = (struct jbd_block_tail *)
+				((char *)bhdr + block_size -
+				sizeof(struct jbd_block_tail));
+	if (!jbd_has_csum(&jbd_fs->sb))
+		return;
+
+	tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr));
+}
+
+static bool
+jbd_verify_meta_csum(struct jbd_fs *jbd_fs,
+		     struct jbd_bhdr *bhdr)
+{
+	u32int block_size = jbd_get32(&jbd_fs->sb, blocksize);
+	struct jbd_block_tail *tail = (struct jbd_block_tail *)
+				((char *)bhdr + block_size -
+				sizeof(struct jbd_block_tail));
+	if (!jbd_has_csum(&jbd_fs->sb))
+		return true;
+
+	return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum);
+}
+
+static u32int jbd_commit_csum(struct jbd_fs *jbd_fs,
+			      struct jbd_commit_header *header)
+{
+	u32int checksum = 0;
+
+	if (jbd_has_csum(&jbd_fs->sb)) {
+		u8int orig_checksum_type = header->chksum_type,
+			 orig_checksum_size = header->chksum_size;
+		u32int orig_checksum = header->chksum[0];
+		u32int block_size = jbd_get32(&jbd_fs->sb, blocksize);
+		header->chksum_type = 0;
+		header->chksum_size = 0;
+		header->chksum[0] = 0;
+
+		/* First calculate crc32c checksum against fs uuid */
+		checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
+				       sizeof(jbd_fs->sb.uuid));
+		/* Calculate crc32c checksum against tho whole block */
+		checksum = ext4_crc32c(checksum, header,
+				block_size);
+
+		header->chksum_type = orig_checksum_type;
+		header->chksum_size = orig_checksum_size;
+		header->chksum[0] = orig_checksum;
+	}
+	return checksum;
+}
+
+static void jbd_commit_csum_set(struct jbd_fs *jbd_fs,
+			      struct jbd_commit_header *header)
+{
+	if (!jbd_has_csum(&jbd_fs->sb))
+		return;
+
+	header->chksum_type = 0;
+	header->chksum_size = 0;
+	header->chksum[0] = jbd_commit_csum(jbd_fs, header);
+}
+
+static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs,
+				   struct jbd_commit_header *header)
+{
+	if (!jbd_has_csum(&jbd_fs->sb))
+		return true;
+
+	return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs,
+					    header));
+}
+
+/*
+ * NOTE: We only make use of @csum parameter when
+ *       JBD_FEATURE_COMPAT_CHECKSUM is enabled.
+ */
+static u32int jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf,
+			       u32int csum,
+			       u32int sequence)
+{
+	u32int checksum = 0;
+
+	if (jbd_has_csum(&jbd_fs->sb)) {
+		u32int block_size = jbd_get32(&jbd_fs->sb, blocksize);
+		/* First calculate crc32c checksum against fs uuid */
+		checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
+				       sizeof(jbd_fs->sb.uuid));
+		/* Then calculate crc32c checksum against sequence no. */
+		checksum = ext4_crc32c(checksum, &sequence,
+				sizeof(u32int));
+		/* Calculate crc32c checksum against tho whole block */
+		checksum = ext4_crc32c(checksum, buf,
+				block_size);
+	} else if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+				     JBD_FEATURE_COMPAT_CHECKSUM)) {
+		u32int block_size = jbd_get32(&jbd_fs->sb, blocksize);
+		/* Calculate crc32c checksum against tho whole block */
+		checksum = ext4_crc32(csum, buf,
+				block_size);
+	}
+	return checksum;
+}
+
+static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag,
+				   u32int checksum)
+{
+	int ver = jbd_has_csum(&jbd_fs->sb);
+	if (!ver)
+		return;
+
+	if (ver == 2) {
+		struct jbd_block_tag *tag = __tag;
+		tag->checksum = (u16int)to_be32(checksum);
+	} else {
+		struct jbd_block_tag3 *tag = __tag;
+		tag->checksum = to_be32(checksum);
+	}
+}
+
+/**@brief  Write jbd superblock to disk.
+ * @param  jbd_fs jbd filesystem
+ * @param  s jbd superblock
+ * @return standard error code*/
+static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
+{
+	int rc;
+	struct ext4_fs *fs = jbd_fs->inode_ref.fs;
+	u64int offset;
+	ext4_fsblk_t fblock;
+	rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
+	if (rc != 0)
+		return rc;
+
+	jbd_sb_csum_set(s);
+	offset = fblock * ext4_sb_get_block_size(&fs->sb);
+	return ext4_block_writebytes(fs->bdev, offset, s,
+				     EXT4_SUPERBLOCK_SIZE);
+}
+
+/**@brief  Read jbd superblock from disk.
+ * @param  jbd_fs jbd filesystem
+ * @param  s jbd superblock
+ * @return standard error code*/
+static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
+{
+	int rc;
+	struct ext4_fs *fs = jbd_fs->inode_ref.fs;
+	u64int offset;
+	ext4_fsblk_t fblock;
+	rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
+	if (rc != 0)
+		return rc;
+
+	offset = fblock * ext4_sb_get_block_size(&fs->sb);
+	return ext4_block_readbytes(fs->bdev, offset, s,
+				    EXT4_SUPERBLOCK_SIZE);
+}
+
+/**@brief  Verify jbd superblock.
+ * @param  sb jbd superblock
+ * @return true if jbd superblock is valid */
+static bool jbd_verify_sb(struct jbd_sb *sb)
+{
+	struct jbd_bhdr *header = &sb->header;
+	if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
+		return false;
+
+	if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
+	    jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
+		return false;
+
+	return jbd_verify_sb_csum(sb);
+}
+
+/**@brief  Write back dirty jbd superblock to disk.
+ * @param  jbd_fs jbd filesystem
+ * @return standard error code*/
+static int jbd_write_sb(struct jbd_fs *jbd_fs)
+{
+	int rc = 0;
+	if (jbd_fs->dirty) {
+		rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
+		if (rc != 0)
+			return rc;
+
+		jbd_fs->dirty = false;
+	}
+	return rc;
+}
+
+/**@brief  Get reference to jbd filesystem.
+ * @param  fs Filesystem to load journal of
+ * @param  jbd_fs jbd filesystem
+ * @return standard error code*/
+int jbd_get_fs(struct ext4_fs *fs,
+	       struct jbd_fs *jbd_fs)
+{
+	int rc;
+	u32int journal_ino;
+
+	memset(jbd_fs, 0, sizeof(struct jbd_fs));
+	/* See if there is journal inode on this filesystem.*/
+	/* FIXME: detection on existance ofbkejournal bdev is
+	 *        missing.*/
+	journal_ino = ext4_get32(&fs->sb, journal_inode_number);
+
+	rc = ext4_fs_get_inode_ref(fs,
+				   journal_ino,
+				   &jbd_fs->inode_ref);
+	if (rc != 0)
+		return rc;
+
+	rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
+	if (rc != 0)
+		goto Error;
+
+	if (!jbd_verify_sb(&jbd_fs->sb)) {
+		werrstr(Eio);
+		rc = -1;
+		goto Error;
+	}
+
+	if (rc == 0)
+		jbd_fs->bdev = fs->bdev;
+
+	return rc;
+Error:
+	ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
+	memset(jbd_fs, 0, sizeof(struct jbd_fs));
+
+	return rc;
+}
+
+/**@brief  Put reference of jbd filesystem.
+ * @param  jbd_fs jbd filesystem
+ * @return standard error code*/
+int jbd_put_fs(struct jbd_fs *jbd_fs)
+{
+	int rc;
+	rc = jbd_write_sb(jbd_fs);
+
+	ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
+	return rc;
+}
+
+/**@brief  Data block lookup helper.
+ * @param  jbd_fs jbd filesystem
+ * @param  iblock block index
+ * @param  fblock logical block address
+ * @return standard error code*/
+int jbd_inode_bmap(struct jbd_fs *jbd_fs,
+		   ext4_lblk_t iblock,
+		   ext4_fsblk_t *fblock)
+{
+	int rc = ext4_fs_get_inode_dblk_idx(
+			&jbd_fs->inode_ref,
+			iblock,
+			fblock,
+			false);
+	return rc;
+}
+
+/**@brief   jbd block get function (through cache).
+ * @param   jbd_fs jbd filesystem
+ * @param   block block descriptor
+ * @param   fblock jbd logical block address
+ * @return  standard error code*/
+static int jbd_block_get(struct jbd_fs *jbd_fs,
+		  struct ext4_block *block,
+		  ext4_fsblk_t fblock)
+{
+	/* TODO: journal device. */
+	int rc;
+	struct ext4_blockdev *bdev = jbd_fs->bdev;
+	ext4_lblk_t iblock = (ext4_lblk_t)fblock;
+
+	/* Lookup the logical block address of
+	 * fblock.*/
+	rc = jbd_inode_bmap(jbd_fs, iblock,
+			    &fblock);
+	if (rc != 0)
+		return rc;
+
+	rc = ext4_block_get(bdev, block, fblock);
+
+	/* If succeeded, mark buffer as BC_FLUSH to indicate
+	 * that data should be written to disk immediately.*/
+	if (rc == 0) {
+		ext4_bcache_set_flag(block->buf, BC_FLUSH);
+		/* As we don't want to occupy too much space
+		 * in block cache, we set this buffer BC_TMP.*/
+		ext4_bcache_set_flag(block->buf, BC_TMP);
+	}
+
+	return rc;
+}
+
+/**@brief   jbd block get function (through cache, don't read).
+ * @param   jbd_fs jbd filesystem
+ * @param   block block descriptor
+ * @param   fblock jbd logical block address
+ * @return  standard error code*/
+static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
+			 struct ext4_block *block,
+			 ext4_fsblk_t fblock)
+{
+	/* TODO: journal device. */
+	int rc;
+	struct ext4_blockdev *bdev = jbd_fs->bdev;
+	ext4_lblk_t iblock = (ext4_lblk_t)fblock;
+	rc = jbd_inode_bmap(jbd_fs, iblock,
+			    &fblock);
+	if (rc != 0)
+		return rc;
+
+	rc = ext4_block_get_noread(bdev, block, fblock);
+	if (rc == 0)
+		ext4_bcache_set_flag(block->buf, BC_FLUSH);
+
+	return rc;
+}
+
+/**@brief   jbd block set procedure (through cache).
+ * @param   jbd_fs jbd filesystem
+ * @param   block block descriptor
+ * @return  standard error code*/
+static int jbd_block_set(struct jbd_fs *jbd_fs,
+		  struct ext4_block *block)
+{
+	struct ext4_blockdev *bdev = jbd_fs->bdev;
+	return ext4_block_set(bdev, block);
+}
+
+/**@brief  helper functions to calculate
+ *         block tag size, not including UUID part.
+ * @param  jbd_fs jbd filesystem
+ * @return tag size in bytes*/
+static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
+{
+	int size;
+
+	/* It is very easy to deal with the case which
+	 * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
+	if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+				     JBD_FEATURE_INCOMPAT_CSUM_V3))
+		return sizeof(struct jbd_block_tag3);
+
+	size = sizeof(struct jbd_block_tag);
+
+	/* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
+	 * add 2 bytes to size.*/
+	if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+				     JBD_FEATURE_INCOMPAT_CSUM_V2))
+		size += sizeof(u16int);
+
+	if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+				     JBD_FEATURE_INCOMPAT_64BIT))
+		return size;
+
+	/* If block number is 4 bytes in size,
+	 * minus 4 bytes from size */
+	return size - sizeof(u32int);
+}
+
+/**@brief  Tag information. */
+struct tag_info {
+	/**@brief  Tag size in bytes, including UUID part.*/
+	int tag_bytes;
+
+	/**@brief  block number stored in this tag.*/
+	ext4_fsblk_t block;
+
+	/**@brief  Is the first 4 bytes of block equals to
+	 *	   JBD_MAGIC_NUMBER? */
+	bool is_escape;
+
+	/**@brief  whether UUID part exists or not.*/
+	bool uuid_exist;
+
+	/**@brief  UUID content if UUID part exists.*/
+	u8int uuid[UUID_SIZE];
+
+	/**@brief  Is this the last tag? */
+	bool last_tag;
+
+	/**@brief  crc32c checksum. */
+	u32int checksum;
+};
+
+/**@brief  Extract information from a block tag.
+ * @param  __tag pointer to the block tag
+ * @param  tag_bytes block tag size of this jbd filesystem
+ * @param  remaining size in buffer containing the block tag
+ * @param  tag_info information of this tag.
+ * @return  0 when succeed, otherwise return Einval.*/
+static int
+jbd_extract_block_tag(struct jbd_fs *jbd_fs,
+		      void *__tag,
+		      int tag_bytes,
+		      s32int remain_buf_size,
+		      struct tag_info *tag_info)
+{
+	char *uuid_start;
+	tag_info->tag_bytes = tag_bytes;
+	tag_info->uuid_exist = false;
+	tag_info->last_tag = false;
+	tag_info->is_escape = false;
+
+	/* See whether it is possible to hold a valid block tag.*/
+	if (remain_buf_size - tag_bytes < 0) {
+		werrstr(Einval);
+		return -1;
+	}
+
+	if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+				     JBD_FEATURE_INCOMPAT_CSUM_V3)) {
+		struct jbd_block_tag3 *tag = __tag;
+		tag_info->block = jbd_get32(tag, blocknr);
+		if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+					     JBD_FEATURE_INCOMPAT_64BIT))
+			 tag_info->block |=
+				 (u64int)jbd_get32(tag, blocknr_high) << 32;
+
+		if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
+			tag_info->is_escape = true;
+
+		if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
+			/* See whether it is possible to hold UUID part.*/
+			if (remain_buf_size - tag_bytes < UUID_SIZE) {
+				werrstr(Einval);
+				return -1;
+			}
+
+			uuid_start = (char *)tag + tag_bytes;
+			tag_info->uuid_exist = true;
+			tag_info->tag_bytes += UUID_SIZE;
+			memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
+		}
+
+		if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
+			tag_info->last_tag = true;
+
+	} else {
+		struct jbd_block_tag *tag = __tag;
+		tag_info->block = jbd_get32(tag, blocknr);
+		if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+					     JBD_FEATURE_INCOMPAT_64BIT))
+			 tag_info->block |=
+				 (u64int)jbd_get32(tag, blocknr_high) << 32;
+
+		if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
+			tag_info->is_escape = true;
+
+		if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
+			/* See whether it is possible to hold UUID part.*/
+			if (remain_buf_size - tag_bytes < UUID_SIZE) {
+				werrstr(Einval);
+				return -1;
+			}
+
+			uuid_start = (char *)tag + tag_bytes;
+			tag_info->uuid_exist = true;
+			tag_info->tag_bytes += UUID_SIZE;
+			memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
+		}
+
+		if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
+			tag_info->last_tag = true;
+
+	}
+	return 0;
+}
+
+/**@brief  Write information to a block tag.
+ * @param  __tag pointer to the block tag
+ * @param  remaining size in buffer containing the block tag
+ * @param  tag_info information of this tag.
+ * @return  0 when succeed, otherwise return Einval.*/
+static int
+jbd_write_block_tag(struct jbd_fs *jbd_fs,
+		    void *__tag,
+		    s32int remain_buf_size,
+		    struct tag_info *tag_info)
+{
+	char *uuid_start;
+	int tag_bytes = jbd_tag_bytes(jbd_fs);
+
+	tag_info->tag_bytes = tag_bytes;
+
+	/* See whether it is possible to hold a valid block tag.*/
+	if (remain_buf_size - tag_bytes < 0) {
+		werrstr(Einval);
+		return -1;
+	}
+
+	if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+				     JBD_FEATURE_INCOMPAT_CSUM_V3)) {
+		struct jbd_block_tag3 *tag = __tag;
+		memset(tag, 0, sizeof(struct jbd_block_tag3));
+		jbd_set32(tag, blocknr, (u32int)tag_info->block);
+		if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+					     JBD_FEATURE_INCOMPAT_64BIT))
+			jbd_set32(tag, blocknr_high, tag_info->block >> 32);
+
+		if (tag_info->uuid_exist) {
+			/* See whether it is possible to hold UUID part.*/
+			if (remain_buf_size - tag_bytes < UUID_SIZE) {
+				werrstr(Einval);
+				return -1;
+			}
+
+			uuid_start = (char *)tag + tag_bytes;
+			tag_info->tag_bytes += UUID_SIZE;
+			memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
+		} else
+			jbd_set32(tag, flags,
+				  jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
+
+		jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
+
+		if (tag_info->last_tag)
+			jbd_set32(tag, flags,
+				  jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
+
+		if (tag_info->is_escape)
+			jbd_set32(tag, flags,
+				  jbd_get32(tag, flags) | JBD_FLAG_ESCAPE);
+
+	} else {
+		struct jbd_block_tag *tag = __tag;
+		memset(tag, 0, sizeof(struct jbd_block_tag));
+		jbd_set32(tag, blocknr, (u32int)tag_info->block);
+		if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+					     JBD_FEATURE_INCOMPAT_64BIT))
+			jbd_set32(tag, blocknr_high, tag_info->block >> 32);
+
+		if (tag_info->uuid_exist) {
+			/* See whether it is possible to hold UUID part.*/
+			if (remain_buf_size - tag_bytes < UUID_SIZE) {
+				werrstr(Einval);
+				return -1;
+			}
+
+			uuid_start = (char *)tag + tag_bytes;
+			tag_info->tag_bytes += UUID_SIZE;
+			memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
+		} else
+			jbd_set16(tag, flags,
+				  jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
+
+		jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
+
+		if (tag_info->last_tag)
+			jbd_set16(tag, flags,
+				  jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
+
+
+		if (tag_info->is_escape)
+			jbd_set16(tag, flags,
+				  jbd_get16(tag, flags) | JBD_FLAG_ESCAPE);
+
+	}
+	return 0;
+}
+
+/**@brief  Iterate all block tags in a block.
+ * @param  jbd_fs jbd filesystem
+ * @param  __tag_start pointer to the block
+ * @param  tag_tbl_size size of the block
+ * @param  func callback routine to indicate that
+ *         a block tag is found
+ * @param  arg additional argument to be passed to func */
+static void
+jbd_iterate_block_table(struct jbd_fs *jbd_fs,
+			void *__tag_start,
+			s32int tag_tbl_size,
+			void (*func)(struct jbd_fs * jbd_fs,
+				     struct tag_info *tag_info,
+				     void *arg),
+			void *arg)
+{
+	char *tag_start, *tag_ptr;
+	int tag_bytes = jbd_tag_bytes(jbd_fs);
+	tag_start = __tag_start;
+	tag_ptr = tag_start;
+
+	/* Cut off the size of block tail storing checksum. */
+	if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+				     JBD_FEATURE_INCOMPAT_CSUM_V2) ||
+	    JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+				     JBD_FEATURE_INCOMPAT_CSUM_V3))
+		tag_tbl_size -= sizeof(struct jbd_block_tail);
+
+	while (tag_tbl_size) {
+		struct tag_info tag_info;
+		int rc = jbd_extract_block_tag(jbd_fs,
+				      tag_ptr,
+				      tag_bytes,
+				      tag_tbl_size,
+				      &tag_info);
+		if (rc != 0)
+			break;
+
+		if (func)
+			func(jbd_fs, &tag_info, arg);
+
+		/* Stop the iteration when we reach the last tag. */
+		if (tag_info.last_tag)
+			break;
+
+		tag_ptr += tag_info.tag_bytes;
+		tag_tbl_size -= tag_info.tag_bytes;
+	}
+}
+
+static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
+				   struct tag_info *tag_info,
+				   void *arg)
+{
+	u32int *iblock = arg;
+	USED(tag_info);
+	ext4_dbg(DEBUG_JBD, "Block in block_tag: %llud\n", tag_info->block);
+	(*iblock)++;
+	wrap(&jbd_fs->sb, *iblock);
+	(void)jbd_fs;
+	return;
+}
+
+static struct revoke_entry *
+jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
+{
+	struct revoke_entry tmp = {
+		.block = block
+	};
+
+	return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
+}
+
+/**@brief  Replay a block in a transaction.
+ * @param  jbd_fs jbd filesystem
+ * @param  tag_info tag_info of the logged block.*/
+static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
+				  struct tag_info *tag_info,
+				  void *__arg)
+{
+	int r;
+	struct replay_arg *arg = __arg;
+	struct recover_info *info = arg->info;
+	u32int *this_block = arg->this_block;
+	struct revoke_entry *revoke_entry;
+	struct ext4_block journal_block, ext4_block;
+	struct ext4_fs *fs = jbd_fs->inode_ref.fs;
+
+	(*this_block)++;
+	wrap(&jbd_fs->sb, *this_block);
+
+	/* We replay this block only if the current transaction id
+	 * is equal or greater than that in revoke entry.*/
+	revoke_entry = jbd_revoke_entry_lookup(info, tag_info->block);
+	if (revoke_entry &&
+	    trans_id_diff(arg->this_trans_id, revoke_entry->trans_id) <= 0)
+		return;
+
+	ext4_dbg(DEBUG_JBD,
+		 "Replaying block in block_tag: %llud\n",
+		 tag_info->block);
+
+	r = jbd_block_get(jbd_fs, &journal_block, *this_block);
+	if (r != 0)
+		return;
+
+	/* We need special treatment for ext4 superblock. */
+	if (tag_info->block) {
+		r = ext4_block_get_noread(fs->bdev, &ext4_block, tag_info->block);
+		if (r != 0) {
+			jbd_block_set(jbd_fs, &journal_block);
+			return;
+		}
+
+		memcpy(ext4_block.data,
+			journal_block.data,
+			jbd_get32(&jbd_fs->sb, blocksize));
+
+		if (tag_info->is_escape)
+			((struct jbd_bhdr *)ext4_block.data)->magic =
+					to_be32(JBD_MAGIC_NUMBER);
+
+		ext4_bcache_set_dirty(ext4_block.buf);
+		ext4_block_set(fs->bdev, &ext4_block);
+	} else {
+		u16int mount_count, state;
+		mount_count = ext4_get16(&fs->sb, mount_count);
+		state = ext4_get16(&fs->sb, state);
+
+		memcpy(&fs->sb,
+			journal_block.data + EXT4_SUPERBLOCK_OFFSET,
+			EXT4_SUPERBLOCK_SIZE);
+
+		/* Mark system as mounted */
+		ext4_set16(&fs->sb, state, state);
+		r = ext4_sb_write(fs->bdev, &fs->sb);
+		if (r != 0)
+			return;
+
+		/*Update mount count*/
+		ext4_set16(&fs->sb, mount_count, mount_count);
+	}
+
+	jbd_block_set(jbd_fs, &journal_block);
+	
+	return;
+}
+
+/**@brief  Add block address to revoke tree, along with
+ *         its transaction id.
+ * @param  info  journal replay info
+ * @param  block  block address to be replayed.*/
+static void jbd_add_revoke_block_tags(struct recover_info *info,
+				      ext4_fsblk_t block)
+{
+	struct revoke_entry *revoke_entry;
+
+	ext4_dbg(DEBUG_JBD, "Add block %llud to revoke tree\n", block);
+	/* If the revoke entry with respect to the block address
+	 * exists already, update its transaction id.*/
+	revoke_entry = jbd_revoke_entry_lookup(info, block);
+	if (revoke_entry) {
+		revoke_entry->trans_id = info->this_trans_id;
+		return;
+	}
+
+	revoke_entry = jbd_alloc_revoke_entry();
+	assert(revoke_entry);
+	revoke_entry->block = block;
+	revoke_entry->trans_id = info->this_trans_id;
+	RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
+
+	return;
+}
+
+static void jbd_destroy_revoke_tree(struct recover_info *info)
+{
+	while (!RB_EMPTY(&info->revoke_root)) {
+		struct revoke_entry *revoke_entry =
+			RB_MIN(jbd_revoke, &info->revoke_root);
+		assert(revoke_entry);
+		RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
+		jbd_free_revoke_entry(revoke_entry);
+	}
+}
+
+
+#define ACTION_SCAN 0
+#define ACTION_REVOKE 1
+#define ACTION_RECOVER 2
+
+/**@brief  Add entries in a revoke block to revoke tree.
+ * @param  jbd_fs jbd filesystem
+ * @param  header revoke block header
+ * @param  recover_info  journal replay info*/
+static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
+				  struct jbd_bhdr *header,
+				  struct recover_info *info)
+{
+	char *blocks_entry;
+	struct jbd_revoke_header *revoke_hdr =
+		(struct jbd_revoke_header *)header;
+	u32int i, nr_entries, record_len = 4;
+
+	/* If we are working on a 64bit jbd filesystem, */
+	if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+				     JBD_FEATURE_INCOMPAT_64BIT))
+		record_len = 8;
+
+	nr_entries = (jbd_get32(revoke_hdr, count) -
+			sizeof(struct jbd_revoke_header)) /
+			record_len;
+
+	blocks_entry = (char *)(revoke_hdr + 1);
+
+	for (i = 0;i < nr_entries;i++) {
+		if (record_len == 8) {
+			u64int *blocks =
+				(u64int *)blocks_entry;
+			jbd_add_revoke_block_tags(info, to_be64(*blocks));
+		} else {
+			u32int *blocks =
+				(u32int *)blocks_entry;
+			jbd_add_revoke_block_tags(info, to_be32(*blocks));
+		}
+		blocks_entry += record_len;
+	}
+}
+
+static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
+				       struct jbd_bhdr *header,
+				       u32int *iblock)
+{
+	jbd_iterate_block_table(jbd_fs,
+				header + 1,
+				jbd_get32(&jbd_fs->sb, blocksize) -
+					sizeof(struct jbd_bhdr),
+				jbd_display_block_tags,
+				iblock);
+}
+
+static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
+					struct jbd_bhdr *header,
+					struct replay_arg *arg)
+{
+	jbd_iterate_block_table(jbd_fs,
+				header + 1,
+				jbd_get32(&jbd_fs->sb, blocksize) -
+					sizeof(struct jbd_bhdr),
+				jbd_replay_block_tags,
+				arg);
+}
+
+/**@brief  The core routine of journal replay.
+ * @param  jbd_fs jbd filesystem
+ * @param  recover_info  journal replay info
+ * @param  action action needed to be taken
+ * @return standard error code*/
+static int jbd_iterate_log(struct jbd_fs *jbd_fs,
+			   struct recover_info *info,
+			   int action)
+{
+	int r = 0;
+	bool log_end = false;
+	struct jbd_sb *sb = &jbd_fs->sb;
+	u32int start_trans_id, this_trans_id;
+	u32int start_block, this_block;
+
+	/* We start iterating valid blocks in the whole journal.*/
+	start_trans_id = this_trans_id = jbd_get32(sb, sequence);
+	start_block = this_block = jbd_get32(sb, start);
+	if (action == ACTION_SCAN)
+		info->trans_cnt = 0;
+	else if (!info->trans_cnt)
+		log_end = true;
+
+	ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %ud\n",
+			    start_trans_id);
+
+	while (!log_end) {
+		struct ext4_block block;
+		struct jbd_bhdr *header;
+		/* If we are not scanning for the last
+		 * valid transaction in the journal,
+		 * we will stop when we reach the end of
+		 * the journal.*/
+		if (action != ACTION_SCAN)
+			if (trans_id_diff(this_trans_id, info->last_trans_id) > 0) {
+				log_end = true;
+				continue;
+			}
+
+		r = jbd_block_get(jbd_fs, &block, this_block);
+		if (r != 0)
+			break;
+
+		header = (struct jbd_bhdr *)block.data;
+		/* This block does not have a valid magic number,
+		 * so we have reached the end of the journal.*/
+		if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
+			jbd_block_set(jbd_fs, &block);
+			log_end = true;
+			continue;
+		}
+
+		/* If the transaction id we found is not expected,
+		 * we may have reached the end of the journal.
+		 *
+		 * If we are not scanning the journal, something
+		 * bad might have taken place. :-( */
+		if (jbd_get32(header, sequence) != this_trans_id) {
+			if (action != ACTION_SCAN) {
+				werrstr(Eio);
+				r = -1;
+			}
+
+			jbd_block_set(jbd_fs, &block);
+			log_end = true;
+			continue;
+		}
+
+		switch (jbd_get32(header, blocktype)) {
+		case JBD_DESCRIPTOR_BLOCK:
+			if (!jbd_verify_meta_csum(jbd_fs, header)) {
+				ext4_dbg(DEBUG_JBD,
+					DBG_WARN "Descriptor block checksum failed."
+						"Journal block: %ud\n",
+						this_block);
+				log_end = true;
+				break;
+			}
+			ext4_dbg(DEBUG_JBD, "Descriptor block: %ud, "
+					    "trans_id: %ud\n",
+					    this_block, this_trans_id);
+			if (action == ACTION_RECOVER) {
+				struct replay_arg replay_arg;
+				replay_arg.info = info;
+				replay_arg.this_block = &this_block;
+				replay_arg.this_trans_id = this_trans_id;
+
+				jbd_replay_descriptor_block(jbd_fs,
+						header, &replay_arg);
+			} else
+				jbd_debug_descriptor_block(jbd_fs,
+						header, &this_block);
+
+			break;
+		case JBD_COMMIT_BLOCK:
+			if (!jbd_verify_commit_csum(jbd_fs,
+					(struct jbd_commit_header *)header)) {
+				ext4_dbg(DEBUG_JBD,
+					DBG_WARN "Commit block checksum failed."
+						"Journal block: %ud\n",
+						this_block);
+				log_end = true;
+				break;
+			}
+			ext4_dbg(DEBUG_JBD, "Commit block: %ud, "
+					    "trans_id: %ud\n",
+					    this_block, this_trans_id);
+			/*
+			 * This is the end of a transaction,
+			 * we may now proceed to the next transaction.
+			 */
+			this_trans_id++;
+			if (action == ACTION_SCAN)
+				info->trans_cnt++;
+			break;
+		case JBD_REVOKE_BLOCK:
+			if (!jbd_verify_meta_csum(jbd_fs, header)) {
+				ext4_dbg(DEBUG_JBD,
+					DBG_WARN "Revoke block checksum failed."
+						"Journal block: %ud\n",
+						this_block);
+				log_end = true;
+				break;
+			}
+			ext4_dbg(DEBUG_JBD, "Revoke block: %ud, "
+					    "trans_id: %ud\n",
+					    this_block, this_trans_id);
+			if (action == ACTION_REVOKE) {
+				info->this_trans_id = this_trans_id;
+				jbd_build_revoke_tree(jbd_fs,
+						header, info);
+			}
+			break;
+		default:
+			log_end = true;
+			break;
+		}
+		jbd_block_set(jbd_fs, &block);
+		this_block++;
+		wrap(sb, this_block);
+		if (this_block == start_block)
+			log_end = true;
+
+	}
+	ext4_dbg(DEBUG_JBD, "End of journal.\n");
+	if (r == 0 && action == ACTION_SCAN) {
+		/* We have finished scanning the journal. */
+		info->start_trans_id = start_trans_id;
+		if (trans_id_diff(this_trans_id, start_trans_id) > 0)
+			info->last_trans_id = this_trans_id - 1;
+		else
+			info->last_trans_id = this_trans_id;
+	}
+
+	return r;
+}
+
+/**@brief  Replay journal.
+ * @param  jbd_fs jbd filesystem
+ * @return standard error code*/
+int jbd_recover(struct jbd_fs *jbd_fs)
+{
+	int r;
+	struct recover_info info;
+	struct jbd_sb *sb = &jbd_fs->sb;
+	if (!sb->start)
+		return 0;
+
+	RB_INIT(&info.revoke_root);
+
+	r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
+	if (r != 0)
+		return r;
+
+	r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
+	if (r != 0)
+		return r;
+
+	r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
+	if (r == 0) {
+		/* If we successfully replay the journal,
+		 * clear EXT4_FINCOM_RECOVER flag on the
+		 * ext4 superblock, and set the start of
+		 * journal to 0.*/
+		u32int features_incompatible =
+			ext4_get32(&jbd_fs->inode_ref.fs->sb,
+				   features_incompatible);
+		jbd_set32(&jbd_fs->sb, start, 0);
+		jbd_set32(&jbd_fs->sb, sequence, info.last_trans_id);
+		features_incompatible &= ~EXT4_FINCOM_RECOVER;
+		ext4_set32(&jbd_fs->inode_ref.fs->sb,
+			   features_incompatible,
+			   features_incompatible);
+		jbd_fs->dirty = true;
+		r = ext4_sb_write(jbd_fs->bdev,
+				  &jbd_fs->inode_ref.fs->sb);
+	}
+	jbd_destroy_revoke_tree(&info);
+	return r;
+}
+
+static void jbd_journal_write_sb(struct jbd_journal *journal)
+{
+	struct jbd_fs *jbd_fs = journal->jbd_fs;
+	jbd_set32(&jbd_fs->sb, start, journal->start);
+	jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
+	jbd_fs->dirty = true;
+}
+
+/**@brief  Start accessing the journal.
+ * @param  jbd_fs jbd filesystem
+ * @param  journal current journal session
+ * @return standard error code*/
+int jbd_journal_start(struct jbd_fs *jbd_fs,
+		      struct jbd_journal *journal)
+{
+	int r;
+	u32int features_incompatible =
+			ext4_get32(&jbd_fs->inode_ref.fs->sb,
+				   features_incompatible);
+	features_incompatible |= EXT4_FINCOM_RECOVER;
+	ext4_set32(&jbd_fs->inode_ref.fs->sb,
+			features_incompatible,
+			features_incompatible);
+	r = ext4_sb_write(jbd_fs->bdev,
+			&jbd_fs->inode_ref.fs->sb);
+	if (r != 0)
+		return r;
+
+	journal->first = jbd_get32(&jbd_fs->sb, first);
+	journal->start = journal->first;
+	journal->last = journal->first;
+	/*
+	 * To invalidate any stale records we need to start from
+	 * the checkpoint transaction ID of the previous journalling session
+	 * plus 1.
+	 */
+	journal->trans_id = jbd_get32(&jbd_fs->sb, sequence) + 1;
+	journal->alloc_trans_id = journal->trans_id;
+
+	journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
+
+	TAILQ_INIT(&journal->cp_queue);
+	RB_INIT(&journal->block_rec_root);
+	journal->jbd_fs = jbd_fs;
+	jbd_journal_write_sb(journal);
+	r = jbd_write_sb(jbd_fs);
+	if (r != 0)
+		return r;
+
+	jbd_fs->bdev->journal = journal;
+	return 0;
+}
+
+static void jbd_trans_end_write(struct ext4_bcache *bc,
+			  struct ext4_buf *buf,
+			  int res,
+			  void *arg);
+
+/*
+ * This routine is only suitable to committed transactions. */
+static void jbd_journal_flush_trans(struct jbd_trans *trans)
+{
+	struct jbd_buf *jbd_buf, *tmp;
+	struct jbd_journal *journal = trans->journal;
+	struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
+	void *tmp_data = ext4_malloc(journal->block_size);
+	assert(tmp_data);
+
+	TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
+		struct ext4_buf *buf;
+		struct ext4_block block;
+		/* The buffer is not yet flushed. */
+		buf = ext4_bcache_find_get(fs->bdev->bc, &block,
+					   jbd_buf->block_rec->lba);
+		if (!(buf && ext4_bcache_test_flag(buf, BC_UPTODATE) &&
+		      jbd_buf->block_rec->trans == trans)) {
+			int r;
+			struct ext4_block jbd_block = EXT4_BLOCK_ZERO();
+			r = jbd_block_get(journal->jbd_fs,
+						&jbd_block,
+						jbd_buf->jbd_lba);
+			assert(r == 0);
+			assert(jbd_block.data != nil);
+			memcpy(tmp_data, jbd_block.data,
+					journal->block_size);
+			ext4_block_set(fs->bdev, &jbd_block);
+			r = ext4_blocks_set_direct(fs->bdev, tmp_data,
+					jbd_buf->block_rec->lba, 1);
+			jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf);
+		} else
+			ext4_block_flush_buf(fs->bdev, buf);
+
+		if (buf)
+			ext4_block_set(fs->bdev, &block);
+	}
+
+	ext4_free(tmp_data);
+}
+
+static void
+jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
+			     struct jbd_trans *trans)
+{
+	journal->start = trans->start_iblock +
+		trans->alloc_blocks;
+	wrap(&journal->jbd_fs->sb, journal->start);
+	journal->trans_id = trans->trans_id + 1;
+	jbd_journal_free_trans(journal,
+			trans, false);
+	jbd_journal_write_sb(journal);
+}
+
+void
+jbd_journal_purge_cp_trans(struct jbd_journal *journal,
+			   bool flush,
+			   bool once)
+{
+	struct jbd_trans *trans;
+	while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
+		if (!trans->data_cnt) {
+			TAILQ_REMOVE(&journal->cp_queue,
+					trans,
+					trans_node);
+			jbd_journal_skip_pure_revoke(journal, trans);
+		} else {
+			if (trans->data_cnt ==
+					trans->written_cnt) {
+				journal->start =
+					trans->start_iblock +
+					trans->alloc_blocks;
+				wrap(&journal->jbd_fs->sb,
+						journal->start);
+				journal->trans_id =
+					trans->trans_id + 1;
+				TAILQ_REMOVE(&journal->cp_queue,
+						trans,
+						trans_node);
+				jbd_journal_free_trans(journal,
+						trans,
+						false);
+				jbd_journal_write_sb(journal);
+			} else if (!flush) {
+				journal->start =
+					trans->start_iblock;
+				wrap(&journal->jbd_fs->sb,
+						journal->start);
+				journal->trans_id =
+					trans->trans_id;
+				jbd_journal_write_sb(journal);
+				break;
+			} else
+				jbd_journal_flush_trans(trans);
+		}
+		if (once)
+			break;
+	}
+}
+
+/**@brief  Stop accessing the journal.
+ * @param  journal current journal session
+ * @return standard error code*/
+int jbd_journal_stop(struct jbd_journal *journal)
+{
+	int r;
+	struct jbd_fs *jbd_fs = journal->jbd_fs;
+	u32int features_incompatible;
+
+	/* Make sure that journalled content have reached
+	 * the disk.*/
+	jbd_journal_purge_cp_trans(journal, true, false);
+
+	/* There should be no block record in this journal
+	 * session. */
+	if (!RB_EMPTY(&journal->block_rec_root))
+		ext4_dbg(DEBUG_JBD,
+			 DBG_WARN "There are still block records "
+			 	  "in this journal session!\n");
+
+	features_incompatible =
+		ext4_get32(&jbd_fs->inode_ref.fs->sb,
+			   features_incompatible);
+	features_incompatible &= ~EXT4_FINCOM_RECOVER;
+	ext4_set32(&jbd_fs->inode_ref.fs->sb,
+			features_incompatible,
+			features_incompatible);
+	r = ext4_sb_write(jbd_fs->bdev,
+			&jbd_fs->inode_ref.fs->sb);
+	if (r != 0)
+		return r;
+
+	journal->start = 0;
+	journal->trans_id = 0;
+	jbd_journal_write_sb(journal);
+	return jbd_write_sb(journal->jbd_fs);
+}
+
+/**@brief  Allocate a block in the journal.
+ * @param  journal current journal session
+ * @param  trans transaction
+ * @return allocated block address*/
+static u32int jbd_journal_alloc_block(struct jbd_journal *journal,
+					struct jbd_trans *trans)
+{
+	u32int start_block;
+
+	start_block = journal->last++;
+	trans->alloc_blocks++;
+	wrap(&journal->jbd_fs->sb, journal->last);
+	
+	/* If there is no space left, flush just one journalled
+	 * transaction.*/
+	if (journal->last == journal->start) {
+		jbd_journal_purge_cp_trans(journal, true, true);
+		assert(journal->last != journal->start);
+	}
+
+	return start_block;
+}
+
+static struct jbd_block_rec *
+jbd_trans_block_rec_lookup(struct jbd_journal *journal,
+			   ext4_fsblk_t lba)
+{
+	struct jbd_block_rec tmp = {
+		.lba = lba
+	};
+
+	return RB_FIND(jbd_block,
+		       &journal->block_rec_root,
+		       &tmp);
+}
+
+static void
+jbd_trans_change_ownership(struct jbd_block_rec *block_rec,
+			   struct jbd_trans *new_trans)
+{
+	LIST_REMOVE(block_rec, tbrec_node);
+	if (new_trans) {
+		/* Now this block record belongs to this transaction. */
+		LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
+	}
+	block_rec->trans = new_trans;
+}
+
+static inline struct jbd_block_rec *
+jbd_trans_insert_block_rec(struct jbd_trans *trans,
+			   ext4_fsblk_t lba)
+{
+	struct jbd_block_rec *block_rec;
+	block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
+	if (block_rec) {
+		jbd_trans_change_ownership(block_rec, trans);
+		return block_rec;
+	}
+	block_rec = ext4_calloc(1, sizeof(struct jbd_block_rec));
+	if (!block_rec)
+		return nil;
+
+	block_rec->lba = lba;
+	block_rec->trans = trans;
+	TAILQ_INIT(&block_rec->dirty_buf_queue);
+	LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
+	RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec);
+	return block_rec;
+}
+
+/*
+ * This routine will do the dirty works.
+ */
+static void
+jbd_trans_finish_callback(struct jbd_journal *journal,
+			  const struct jbd_trans *trans,
+			  struct jbd_block_rec *block_rec,
+			  bool abort,
+			  bool revoke)
+{
+	struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
+	if (block_rec->trans != trans)
+		return;
+
+	if (!abort) {
+		struct jbd_buf *jbd_buf, *tmp;
+		TAILQ_FOREACH_SAFE(jbd_buf,
+				&block_rec->dirty_buf_queue,
+				dirty_buf_node,
+				tmp) {
+			jbd_trans_end_write(fs->bdev->bc, nil, 0, jbd_buf);
+		}
+	} else {
+		/*
+		 * We have to roll back data if the block is going to be
+		 * aborted.
+		 */
+		struct jbd_buf *jbd_buf;
+		struct ext4_block jbd_block = EXT4_BLOCK_ZERO(),
+				  block = EXT4_BLOCK_ZERO();
+		jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
+				jbd_buf_dirty);
+		if (jbd_buf) {
+			if (!revoke) {
+				int r;
+				r = ext4_block_get_noread(fs->bdev,
+							&block,
+							block_rec->lba);
+				assert(r == 0);
+				r = jbd_block_get(journal->jbd_fs,
+							&jbd_block,
+							jbd_buf->jbd_lba);
+				assert(r == 0);
+				memcpy(block.data, jbd_block.data,
+						journal->block_size);
+
+				jbd_trans_change_ownership(block_rec,
+						jbd_buf->trans);
+
+				block.buf->end_write = jbd_trans_end_write;
+				block.buf->end_write_arg = jbd_buf;
+
+				ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
+				ext4_bcache_set_dirty(block.buf);
+
+				ext4_block_set(fs->bdev, &jbd_block);
+				ext4_block_set(fs->bdev, &block);
+				return;
+			} else {
+				/* The revoked buffer is yet written. */
+				jbd_trans_change_ownership(block_rec,
+						jbd_buf->trans);
+			}
+		}
+	}
+}
+
+static inline void
+jbd_trans_remove_block_rec(struct jbd_journal *journal,
+			   struct jbd_block_rec *block_rec,
+			   struct jbd_trans *trans)
+{
+	/* If this block record doesn't belong to this transaction,
+	 * give up.*/
+	if (block_rec->trans == trans) {
+		LIST_REMOVE(block_rec, tbrec_node);
+		RB_REMOVE(jbd_block,
+				&journal->block_rec_root,
+				block_rec);
+		ext4_free(block_rec);
+	}
+}
+
+/**@brief  Add block to a transaction and mark it dirty.
+ * @param  trans transaction
+ * @param  block block descriptor
+ * @return standard error code*/
+int jbd_trans_set_block_dirty(struct jbd_trans *trans,
+			      struct ext4_block *block)
+{
+	struct jbd_buf *jbd_buf;
+	struct jbd_revoke_rec *rec, tmp_rec = {
+		.lba = block->lb_id
+	};
+	struct jbd_block_rec *block_rec;
+
+	if (block->buf->end_write == jbd_trans_end_write) {
+		jbd_buf = block->buf->end_write_arg;
+		if (jbd_buf && jbd_buf->trans == trans)
+			return 0;
+	}
+	jbd_buf = ext4_calloc(1, sizeof(struct jbd_buf));
+	if (!jbd_buf) {
+		werrstr(Enomem);
+		return -1;
+	}
+
+	if ((block_rec = jbd_trans_insert_block_rec(trans,
+					block->lb_id)) == nil) {
+		ext4_free(jbd_buf);
+		werrstr(Enomem);
+		return -1;
+	}
+
+	TAILQ_INSERT_TAIL(&block_rec->dirty_buf_queue,
+			jbd_buf,
+			dirty_buf_node);
+
+	jbd_buf->block_rec = block_rec;
+	jbd_buf->trans = trans;
+	jbd_buf->block = *block;
+	ext4_bcache_inc_ref(block->buf);
+
+	/* If the content reach the disk, notify us
+	 * so that we may do a checkpoint. */
+	block->buf->end_write = jbd_trans_end_write;
+	block->buf->end_write_arg = jbd_buf;
+
+	trans->data_cnt++;
+	TAILQ_INSERT_HEAD(&trans->buf_queue, jbd_buf, buf_node);
+
+	ext4_bcache_set_dirty(block->buf);
+	rec = RB_FIND(jbd_revoke_tree,
+			&trans->revoke_root,
+			&tmp_rec);
+	if (rec) {
+		RB_REMOVE(jbd_revoke_tree, &trans->revoke_root,
+			  rec);
+		ext4_free(rec);
+	}
+
+	return 0;
+}
+
+/**@brief  Add block to be revoked to a transaction
+ * @param  trans transaction
+ * @param  lba logical block address
+ * @return standard error code*/
+int jbd_trans_revoke_block(struct jbd_trans *trans,
+			   ext4_fsblk_t lba)
+{
+	struct jbd_revoke_rec tmp_rec = {
+		.lba = lba
+	}, *rec;
+	rec = RB_FIND(jbd_revoke_tree,
+		      &trans->revoke_root,
+		      &tmp_rec);
+	if (rec)
+		return 0;
+
+	rec = ext4_calloc(1, sizeof(struct jbd_revoke_rec));
+	if (!rec) {
+		werrstr(Enomem);
+		return -1;
+	}
+
+	rec->lba = lba;
+	RB_INSERT(jbd_revoke_tree, &trans->revoke_root, rec);
+	return 0;
+}
+
+/**@brief  Try to add block to be revoked to a transaction.
+ *         If @lba still remains in an transaction on checkpoint
+ *         queue, add @lba as a revoked block to the transaction.
+ * @param  trans transaction
+ * @param  lba logical block address
+ * @return standard error code*/
+int jbd_trans_try_revoke_block(struct jbd_trans *trans,
+			       ext4_fsblk_t lba)
+{
+	struct jbd_journal *journal = trans->journal;
+	struct jbd_block_rec *block_rec =
+		jbd_trans_block_rec_lookup(journal, lba);
+
+	if (block_rec) {
+		if (block_rec->trans == trans) {
+			struct jbd_buf *jbd_buf =
+				TAILQ_LAST(&block_rec->dirty_buf_queue,
+					jbd_buf_dirty);
+			/* If there are still unwritten buffers. */
+			if (TAILQ_FIRST(&block_rec->dirty_buf_queue) !=
+			    jbd_buf)
+				jbd_trans_revoke_block(trans, lba);
+
+		} else
+			jbd_trans_revoke_block(trans, lba);
+	}
+
+	return 0;
+}
+
+/**@brief  Free a transaction
+ * @param  journal current journal session
+ * @param  trans transaction
+ * @param  abort discard all the modifications on the block?
+ * @return standard error code*/
+void jbd_journal_free_trans(struct jbd_journal *journal,
+			    struct jbd_trans *trans,
+			    bool abort)
+{
+	struct jbd_buf *jbd_buf, *tmp;
+	struct jbd_revoke_rec *rec, *tmp2;
+	struct jbd_block_rec *block_rec, *tmp3;
+	struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
+	TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
+			  tmp) {
+		block_rec = jbd_buf->block_rec;
+		if (abort) {
+			jbd_buf->block.buf->end_write = nil;
+			jbd_buf->block.buf->end_write_arg = nil;
+			ext4_bcache_clear_dirty(jbd_buf->block.buf);
+			ext4_block_set(fs->bdev, &jbd_buf->block);
+		}
+
+		TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
+			jbd_buf,
+			dirty_buf_node);
+		jbd_trans_finish_callback(journal,
+				trans,
+				block_rec,
+				abort,
+				false);
+		TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
+		ext4_free(jbd_buf);
+	}
+	RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
+			  tmp2) {
+		RB_REMOVE(jbd_revoke_tree, &trans->revoke_root, rec);
+		ext4_free(rec);
+	}
+	LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
+			  tmp3) {
+		jbd_trans_remove_block_rec(journal, block_rec, trans);
+	}
+
+	ext4_free(trans);
+}
+
+/**@brief  Write commit block for a transaction
+ * @param  trans transaction
+ * @return standard error code*/
+static int jbd_trans_write_commit_block(struct jbd_trans *trans)
+{
+	int rc;
+	struct ext4_block block;
+	struct jbd_commit_header *header;
+	u32int commit_iblock;
+	struct jbd_journal *journal = trans->journal;
+
+	commit_iblock = jbd_journal_alloc_block(journal, trans);
+
+	rc = jbd_block_get_noread(journal->jbd_fs, &block, commit_iblock);
+	if (rc != 0)
+		return rc;
+
+	header = (struct jbd_commit_header *)block.data;
+	jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
+	jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
+	jbd_set32(&header->header, sequence, trans->trans_id);
+
+	if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
+				JBD_FEATURE_COMPAT_CHECKSUM)) {
+		header->chksum_type = JBD_CRC32_CHKSUM;
+		header->chksum_size = JBD_CRC32_CHKSUM_SIZE;
+		jbd_set32(header, chksum[0], trans->data_csum);
+	}
+	jbd_commit_csum_set(journal->jbd_fs, header);
+	ext4_bcache_set_dirty(block.buf);
+	ext4_bcache_set_flag(block.buf, BC_TMP);
+	rc = jbd_block_set(journal->jbd_fs, &block);
+	return rc;
+}
+
+/**@brief  Write descriptor block for a transaction
+ * @param  journal current journal session
+ * @param  trans transaction
+ * @return standard error code*/
+static int jbd_journal_prepare(struct jbd_journal *journal,
+			       struct jbd_trans *trans)
+{
+	int rc = 0, i = 0;
+	struct ext4_block desc_block = EXT4_BLOCK_ZERO(),
+			  data_block = EXT4_BLOCK_ZERO();
+	s32int tag_tbl_size = 0;
+	u32int desc_iblock = 0;
+	u32int data_iblock;
+	char *tag_start, *tag_ptr = nil;
+	struct jbd_buf *jbd_buf, *tmp;
+	struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
+	u32int checksum = EXT4_CRC32_INIT;
+	struct jbd_bhdr *bhdr = nil;
+	void *data;
+
+	/* Try to remove any non-dirty buffers from the tail of
+	 * buf_queue. */
+	TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
+			jbd_trans_buf, buf_node, tmp) {
+		struct jbd_revoke_rec tmp_rec = {
+			.lba = jbd_buf->block_rec->lba
+		};
+		/* We stop the iteration when we find a dirty buffer. */
+		if (ext4_bcache_test_flag(jbd_buf->block.buf,
+					BC_DIRTY))
+			break;
+	
+		TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
+			jbd_buf,
+			dirty_buf_node);
+
+		jbd_buf->block.buf->end_write = nil;
+		jbd_buf->block.buf->end_write_arg = nil;
+		jbd_trans_finish_callback(journal,
+				trans,
+				jbd_buf->block_rec,
+				true,
+				RB_FIND(jbd_revoke_tree,
+					&trans->revoke_root,
+					&tmp_rec) != nil);
+		jbd_trans_remove_block_rec(journal,
+					jbd_buf->block_rec, trans);
+		trans->data_cnt--;
+
+		ext4_block_set(fs->bdev, &jbd_buf->block);
+		TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
+		ext4_free(jbd_buf);
+	}
+
+	TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
+		struct tag_info tag_info;
+		bool uuid_exist = false;
+		bool is_escape = false;
+		struct jbd_revoke_rec tmp_rec = {
+			.lba = jbd_buf->block_rec->lba
+		};
+		if (!ext4_bcache_test_flag(jbd_buf->block.buf,
+					   BC_DIRTY)) {
+			TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
+					jbd_buf,
+					dirty_buf_node);
+
+			jbd_buf->block.buf->end_write = nil;
+			jbd_buf->block.buf->end_write_arg = nil;
+
+			/* The buffer has not been modified, just release
+			 * that jbd_buf. */
+			jbd_trans_finish_callback(journal,
+					trans,
+					jbd_buf->block_rec,
+					true,
+					RB_FIND(jbd_revoke_tree,
+						&trans->revoke_root,
+						&tmp_rec) != nil);
+			jbd_trans_remove_block_rec(journal,
+					jbd_buf->block_rec, trans);
+			trans->data_cnt--;
+
+			ext4_block_set(fs->bdev, &jbd_buf->block);
+			TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
+			ext4_free(jbd_buf);
+			continue;
+		}
+		checksum = jbd_block_csum(journal->jbd_fs,
+					  jbd_buf->block.data,
+					  checksum,
+					  trans->trans_id);
+		if (((struct jbd_bhdr *)jbd_buf->block.data)->magic ==
+				to_be32(JBD_MAGIC_NUMBER))
+			is_escape = true;
+
+again:
+		if (!desc_iblock) {
+			desc_iblock = jbd_journal_alloc_block(journal, trans);
+			rc = jbd_block_get_noread(journal->jbd_fs, &desc_block, desc_iblock);
+			if (rc != 0)
+				break;
+
+			bhdr = (struct jbd_bhdr *)desc_block.data;
+			jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
+			jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
+			jbd_set32(bhdr, sequence, trans->trans_id);
+
+			tag_start = (char *)(bhdr + 1);
+			tag_ptr = tag_start;
+			uuid_exist = true;
+			tag_tbl_size = journal->block_size -
+				sizeof(struct jbd_bhdr);
+
+			if (jbd_has_csum(&journal->jbd_fs->sb))
+				tag_tbl_size -= sizeof(struct jbd_block_tail);
+
+			if (!trans->start_iblock)
+				trans->start_iblock = desc_iblock;
+
+			ext4_bcache_set_dirty(desc_block.buf);
+			ext4_bcache_set_flag(desc_block.buf, BC_TMP);
+		}
+		tag_info.block = jbd_buf->block.lb_id;
+		tag_info.uuid_exist = uuid_exist;
+		tag_info.is_escape = is_escape;
+		if (i == trans->data_cnt - 1)
+			tag_info.last_tag = true;
+		else
+			tag_info.last_tag = false;
+
+		tag_info.checksum = checksum;
+
+		if (uuid_exist)
+			memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
+					UUID_SIZE);
+
+		rc = jbd_write_block_tag(journal->jbd_fs,
+				tag_ptr,
+				tag_tbl_size,
+				&tag_info);
+		if (rc != 0) {
+			jbd_meta_csum_set(journal->jbd_fs, bhdr);
+			desc_iblock = 0;
+			rc = jbd_block_set(journal->jbd_fs, &desc_block);
+			if (rc != 0)
+				break;
+
+			goto again;
+		}
+
+		data_iblock = jbd_journal_alloc_block(journal, trans);
+		rc = jbd_block_get_noread(journal->jbd_fs, &data_block, data_iblock);
+		if (rc != 0) {
+			desc_iblock = 0;
+			ext4_bcache_clear_dirty(desc_block.buf);
+			jbd_block_set(journal->jbd_fs, &desc_block);
+			break;
+		}
+
+		data = data_block.data;
+		memcpy(data, jbd_buf->block.data,
+			journal->block_size);
+		if (is_escape)
+			((struct jbd_bhdr *)data)->magic = 0;
+
+		ext4_bcache_set_dirty(data_block.buf);
+		ext4_bcache_set_flag(data_block.buf, BC_TMP);
+		rc = jbd_block_set(journal->jbd_fs, &data_block);
+		if (rc != 0) {
+			desc_iblock = 0;
+			ext4_bcache_clear_dirty(desc_block.buf);
+			jbd_block_set(journal->jbd_fs, &desc_block);
+			break;
+		}
+		jbd_buf->jbd_lba = data_iblock;
+
+		tag_ptr += tag_info.tag_bytes;
+		tag_tbl_size -= tag_info.tag_bytes;
+
+		i++;
+	}
+	if (rc == 0 && desc_iblock) {
+		jbd_meta_csum_set(journal->jbd_fs,
+				(struct jbd_bhdr *)bhdr);
+		trans->data_csum = checksum;
+		rc = jbd_block_set(journal->jbd_fs, &desc_block);
+	}
+
+	return rc;
+}
+
+/**@brief  Write revoke block for a transaction
+ * @param  journal current journal session
+ * @param  trans transaction
+ * @return standard error code*/
+static int
+jbd_journal_prepare_revoke(struct jbd_journal *journal,
+			   struct jbd_trans *trans)
+{
+	int rc = 0, i = 0;
+	struct ext4_block desc_block = EXT4_BLOCK_ZERO();
+	s32int tag_tbl_size = 0;
+	u32int desc_iblock = 0;
+	char *blocks_entry = nil;
+	struct jbd_revoke_rec *rec, *tmp;
+	struct jbd_revoke_header *header = nil;
+	s32int record_len = 4;
+	struct jbd_bhdr *bhdr = nil;
+
+	if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
+				     JBD_FEATURE_INCOMPAT_64BIT))
+		record_len = 8;
+
+	RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
+			  tmp) {
+again:
+		if (!desc_iblock) {
+			desc_iblock = jbd_journal_alloc_block(journal, trans);
+			rc = jbd_block_get_noread(journal->jbd_fs, &desc_block,
+						  desc_iblock);
+			if (rc != 0)
+				break;
+
+			bhdr = (struct jbd_bhdr *)desc_block.data;
+			jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
+			jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
+			jbd_set32(bhdr, sequence, trans->trans_id);
+			
+			header = (struct jbd_revoke_header *)bhdr;
+			blocks_entry = (char *)(header + 1);
+			tag_tbl_size = journal->block_size -
+				sizeof(struct jbd_revoke_header);
+
+			if (jbd_has_csum(&journal->jbd_fs->sb))
+				tag_tbl_size -= sizeof(struct jbd_block_tail);
+
+			if (!trans->start_iblock)
+				trans->start_iblock = desc_iblock;
+
+			ext4_bcache_set_dirty(desc_block.buf);
+			ext4_bcache_set_flag(desc_block.buf, BC_TMP);
+		}
+
+		if (tag_tbl_size < record_len) {
+			jbd_set32(header, count,
+				  journal->block_size - tag_tbl_size);
+			jbd_meta_csum_set(journal->jbd_fs, bhdr);
+			bhdr = nil;
+			desc_iblock = 0;
+			header = nil;
+			rc = jbd_block_set(journal->jbd_fs, &desc_block);
+			if (rc != 0)
+				break;
+
+			goto again;
+		}
+		if (record_len == 8) {
+			u64int *blocks =
+				(u64int *)blocks_entry;
+			*blocks = to_be64(rec->lba);
+		} else {
+			u32int *blocks =
+				(u32int *)blocks_entry;
+			*blocks = to_be32((u32int)rec->lba);
+		}
+		blocks_entry += record_len;
+		tag_tbl_size -= record_len;
+
+		i++;
+	}
+	if (rc == 0 && desc_iblock) {
+		if (header != nil)
+			jbd_set32(header, count,
+				  journal->block_size - tag_tbl_size);
+
+		jbd_meta_csum_set(journal->jbd_fs, bhdr);
+		rc = jbd_block_set(journal->jbd_fs, &desc_block);
+	}
+
+	return rc;
+}
+
+/**@brief  Put references of block descriptors in a transaction.
+ * @param  journal current journal session
+ * @param  trans transaction*/
+void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
+{
+	struct jbd_buf *jbd_buf, *tmp;
+	struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
+	TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
+			tmp) {
+		struct ext4_block block = jbd_buf->block;
+		ext4_block_set(fs->bdev, &block);
+	}
+}
+
+/**@brief  Update the start block of the journal when
+ *         all the contents in a transaction reach the disk.*/
+static void jbd_trans_end_write(struct ext4_bcache *bc,
+			  struct ext4_buf *buf,
+			  int res,
+			  void *arg)
+{
+	struct jbd_buf *jbd_buf = arg;
+	struct jbd_trans *trans = jbd_buf->trans;
+	struct jbd_block_rec *block_rec = jbd_buf->block_rec;
+	struct jbd_journal *journal = trans->journal;
+	bool first_in_queue =
+		trans == TAILQ_FIRST(&journal->cp_queue);
+	if (res != 0)
+		trans->error = res;
+
+	USED(bc);
+	TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
+	TAILQ_REMOVE(&block_rec->dirty_buf_queue,
+			jbd_buf,
+			dirty_buf_node);
+
+	jbd_trans_finish_callback(journal,
+			trans,
+			jbd_buf->block_rec,
+			false,
+			false);
+	if (block_rec->trans == trans && buf) {
+		/* Clear the end_write and end_write_arg fields. */
+		buf->end_write = nil;
+		buf->end_write_arg = nil;
+	}
+
+	ext4_free(jbd_buf);
+
+	trans->written_cnt++;
+	if (trans->written_cnt == trans->data_cnt) {
+		/* If it is the first transaction on checkpoint queue,
+		 * we will shift the start of the journal to the next
+		 * transaction, and remove subsequent written
+		 * transactions from checkpoint queue until we find
+		 * an unwritten one. */
+		if (first_in_queue) {
+			journal->start = trans->start_iblock +
+				trans->alloc_blocks;
+			wrap(&journal->jbd_fs->sb, journal->start);
+			journal->trans_id = trans->trans_id + 1;
+			TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
+			jbd_journal_free_trans(journal, trans, false);
+
+			jbd_journal_purge_cp_trans(journal, false, false);
+			jbd_journal_write_sb(journal);
+			jbd_write_sb(journal->jbd_fs);
+		}
+	}
+}
+
+/**@brief  Commit a transaction to the journal immediately.
+ * @param  journal current journal session
+ * @param  trans transaction
+ * @return standard error code*/
+static int __jbd_journal_commit_trans(struct jbd_journal *journal,
+				      struct jbd_trans *trans)
+{
+	int rc;
+	u32int last = journal->last;
+	struct jbd_revoke_rec *rec, *tmp;
+
+	trans->trans_id = journal->alloc_trans_id;
+	rc = jbd_journal_prepare(journal, trans);
+	if (rc != 0)
+		goto Finish;
+
+	rc = jbd_journal_prepare_revoke(journal, trans);
+	if (rc != 0)
+		goto Finish;
+
+	if (TAILQ_EMPTY(&trans->buf_queue) &&
+	    RB_EMPTY(&trans->revoke_root)) {
+		/* Since there are no entries in both buffer list
+		 * and revoke entry list, we do not consider trans as
+		 * complete transaction and just return 0.*/
+		jbd_journal_free_trans(journal, trans, false);
+		goto Finish;
+	}
+
+	rc = jbd_trans_write_commit_block(trans);
+	if (rc != 0)
+		goto Finish;
+
+	journal->alloc_trans_id++;
+
+	/* Complete the checkpoint of buffers which are revoked. */
+	RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
+			tmp) {
+		struct jbd_block_rec *block_rec =
+			jbd_trans_block_rec_lookup(journal, rec->lba);
+		struct jbd_buf *jbd_buf = nil;
+		if (block_rec)
+			jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
+					jbd_buf_dirty);
+		if (jbd_buf) {
+			struct ext4_buf *buf;
+			struct ext4_block block = EXT4_BLOCK_ZERO();
+			/*
+			 * We do this to reset the ext4_buf::end_write and
+			 * ext4_buf::end_write_arg fields so that the checkpoint
+			 * callback won't be triggered again.
+			 */
+			buf = ext4_bcache_find_get(journal->jbd_fs->bdev->bc,
+					&block,
+					jbd_buf->block_rec->lba);
+			jbd_trans_end_write(journal->jbd_fs->bdev->bc, buf, 0, jbd_buf);
+			if (buf)
+				ext4_block_set(journal->jbd_fs->bdev, &block);
+		}
+	}
+
+	if (TAILQ_EMPTY(&journal->cp_queue)) {
+		/*
+		 * This transaction is going to be the first object in the
+		 * checkpoint queue.
+		 * When the first transaction in checkpoint queue is completely
+		 * written to disk, we shift the tail of the log to right.
+		 */
+		if (trans->data_cnt) {
+			journal->start = trans->start_iblock;
+			wrap(&journal->jbd_fs->sb, journal->start);
+			journal->trans_id = trans->trans_id;
+			jbd_journal_write_sb(journal);
+			jbd_write_sb(journal->jbd_fs);
+			TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
+					trans_node);
+			jbd_journal_cp_trans(journal, trans);
+		} else {
+			journal->start = trans->start_iblock +
+				trans->alloc_blocks;
+			wrap(&journal->jbd_fs->sb, journal->start);
+			journal->trans_id = trans->trans_id + 1;
+			jbd_journal_write_sb(journal);
+			jbd_journal_free_trans(journal, trans, false);
+		}
+	} else {
+		/* No need to do anything to the JBD superblock. */
+		TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
+				trans_node);
+		if (trans->data_cnt)
+			jbd_journal_cp_trans(journal, trans);
+	}
+Finish:
+	if (rc != 0) {
+		journal->last = last;
+		jbd_journal_free_trans(journal, trans, true);
+	}
+	return rc;
+}
+
+/**@brief  Allocate a new transaction
+ * @param  journal current journal session
+ * @return transaction allocated*/
+struct jbd_trans *
+jbd_journal_new_trans(struct jbd_journal *journal)
+{
+	struct jbd_trans *trans;
+	trans = ext4_calloc(1, sizeof(struct jbd_trans));
+	if (!trans)
+		return nil;
+
+	/* We will assign a trans_id to this transaction,
+	 * once it has been committed.*/
+	trans->journal = journal;
+	trans->data_csum = EXT4_CRC32_INIT;
+	trans->error = 0;
+	TAILQ_INIT(&trans->buf_queue);
+	return trans;
+}
+
+/**@brief  Commit a transaction to the journal immediately.
+ * @param  journal current journal session
+ * @param  trans transaction
+ * @return standard error code*/
+int jbd_journal_commit_trans(struct jbd_journal *journal,
+			     struct jbd_trans *trans)
+{
+	return __jbd_journal_commit_trans(journal, trans);
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_mbr.c
@@ -1,0 +1,165 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_mbr.h"
+
+#define MBR_SIGNATURE 0xAA55
+
+#pragma pack on
+
+struct ext4_part_entry {
+	u8int status;
+	u8int chs1[3];
+	u8int type;
+	u8int chs2[3];
+	u32int first_lba;
+	u32int sectors;
+};
+
+struct ext4_mbr {
+	u8int bootstrap[442];
+	u32int disk_id;
+	struct ext4_part_entry part_entry[4];
+	u16int signature;
+};
+
+#pragma pack off
+
+int ext4_mbr_scan(struct ext4_blockdev *parent, struct ext4_mbr_bdevs *bdevs)
+{
+	int r;
+	usize i;
+
+	ext4_dbg(DEBUG_MBR, DBG_INFO "ext4_mbr_scan\n");
+	memset(bdevs, 0, sizeof(struct ext4_mbr_bdevs));
+	r = ext4_block_init(parent);
+	if (r != 0)
+		return r;
+
+	r = ext4_block_readbytes(parent, 0, parent->bdif->ph_bbuf, 512);
+	if (r != 0) {
+		goto blockdev_fini;
+	}
+
+	const struct ext4_mbr *mbr = (void *)parent->bdif->ph_bbuf;
+
+	if (to_le16(mbr->signature) != MBR_SIGNATURE) {
+		ext4_dbg(DEBUG_MBR, DBG_ERROR "ext4_mbr_scan: unknown "
+			 "signature: 0x%x\n", to_le16(mbr->signature));
+		werrstr(Enotfound);
+		r = -1;
+		goto blockdev_fini;
+	}
+
+	/*Show bootstrap code*/
+	ext4_dbg(DEBUG_MBR, "mbr_part: bootstrap:");
+	for (i = 0; i < sizeof(mbr->bootstrap); ++i) {
+		if (!(i & 0xF))
+				ext4_dbg(DEBUG_MBR | DEBUG_NOPREFIX, "\n");
+		ext4_dbg(DEBUG_MBR | DEBUG_NOPREFIX, "%02x, ", mbr->bootstrap[i]);
+	}
+
+	ext4_dbg(DEBUG_MBR | DEBUG_NOPREFIX, "\n\n");
+	for (i = 0; i < 4; ++i) {
+		const struct ext4_part_entry *pe = &mbr->part_entry[i];
+		ext4_dbg(DEBUG_MBR, "mbr_part: %d\n", (int)i);
+		ext4_dbg(DEBUG_MBR, "\tstatus: 0x%x\n", pe->status);
+		ext4_dbg(DEBUG_MBR, "\ttype 0x%x:\n", pe->type);
+		ext4_dbg(DEBUG_MBR, "\tfirst_lba: 0x%ux\n", pe->first_lba);
+		ext4_dbg(DEBUG_MBR, "\tsectors: 0x%ux\n", pe->sectors);
+
+		if (!pe->sectors)
+			continue; /*Empty entry*/
+
+		if (pe->type != 0x83)
+			continue; /*Unsupported entry. 0x83 - linux native*/
+
+		bdevs->partitions[i].bdif = parent->bdif;
+		bdevs->partitions[i].part_offset =
+			(u64int)pe->first_lba * parent->bdif->ph_bsize;
+		bdevs->partitions[i].part_size =
+			(u64int)pe->sectors * parent->bdif->ph_bsize;
+	}
+
+	blockdev_fini:
+	ext4_block_fini(parent);
+	return r;
+}
+
+int ext4_mbr_write(struct ext4_blockdev *parent, struct ext4_mbr_parts *parts, u32int disk_id)
+{
+	int r;
+	u64int disk_size;
+	u32int division_sum = parts->division[0] + parts->division[1] +
+				parts->division[2] + parts->division[3];
+
+	if (division_sum > 100) {
+		werrstr(Einval);
+		return -1;
+	}
+
+	ext4_dbg(DEBUG_MBR, DBG_INFO "ext4_mbr_write\n");
+	r = ext4_block_init(parent);
+	if (r != 0)
+		return r;
+
+	disk_size = parent->part_size;
+
+	/*Calculate CHS*/
+	u32int k = 16;
+	while ((k < 256) && ((disk_size / parent->bdif->ph_bsize / k / 63) > 1024))
+		k *= 2;
+
+	if (k == 256)
+		--k;
+
+	const u32int cyl_size = parent->bdif->ph_bsize * 63 * k;
+	const u32int cyl_count = disk_size / cyl_size;
+
+	struct ext4_mbr *mbr = (void *)parent->bdif->ph_bbuf;
+	memset(mbr, 0, sizeof(struct ext4_mbr));
+
+	mbr->disk_id = disk_id;
+
+	u32int cyl_it = 0;
+	for (int i = 0; i < 4; ++i) {
+		u32int cyl_part = cyl_count * parts->division[i] / 100;
+		if (!cyl_part)
+			continue;
+
+		u32int part_start = cyl_it * cyl_size;
+		u32int part_size = cyl_part * cyl_size;
+
+		if (i == 0) {
+			part_start += 63;
+			part_size -= 63 * parent->bdif->ph_bsize;
+		}
+
+		u32int cyl_end = cyl_part + cyl_it - 1;
+
+		mbr->part_entry[i].status = 0;
+		mbr->part_entry[i].chs1[0] = i ? 0 : 1;;
+		mbr->part_entry[i].chs1[1] = ((cyl_it >> 2) & 0xC0) + 1;
+		mbr->part_entry[i].chs1[2] = cyl_it & 0xFF;
+		mbr->part_entry[i].type = 0x83;
+		mbr->part_entry[i].chs2[0] = k - 1;
+		mbr->part_entry[i].chs2[1] = ((cyl_end >> 2) & 0xC0) + 63;
+		mbr->part_entry[i].chs2[2] = cyl_end & 0xFF;
+
+		mbr->part_entry[i].first_lba = part_start;
+		mbr->part_entry[i].sectors = part_size / parent->bdif->ph_bsize;
+
+		cyl_it += cyl_part;
+	}
+
+	mbr->signature = MBR_SIGNATURE;
+	r = ext4_block_writebytes(parent, 0, parent->bdif->ph_bbuf, 512);
+	if (r != 0)
+		goto blockdev_fini;
+
+
+	blockdev_fini:
+	ext4_block_fini(parent);
+	return r;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_mkfs.c
@@ -1,0 +1,805 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_super.h"
+#include "ext4_block_group.h"
+#include "ext4_dir.h"
+#include "ext4_dir_idx.h"
+#include "ext4_fs.h"
+#include "ext4_inode.h"
+#include "ext4_ialloc.h"
+#include "ext4_mkfs.h"
+
+static inline int log_2(int j)
+{
+	int i;
+
+	for (i = 0; j > 0; i++)
+		j >>= 1;
+
+	return i - 1;
+}
+
+static int sb2info(struct ext4_sblock *sb, struct ext4_mkfs_info *info)
+{
+	if (to_le16(sb->magic) != EXT4_SUPERBLOCK_MAGIC) {
+		werrstr("invalid superblock magic");
+		return -1;
+	}
+
+	info->block_size = 1024 << to_le32(sb->log_block_size);
+	info->blocks_per_group = to_le32(sb->blocks_per_group);
+	info->inodes_per_group = to_le32(sb->inodes_per_group);
+	info->inode_size = to_le16(sb->inode_size);
+	info->inodes = to_le32(sb->inodes_count);
+	info->feat_ro_compat = to_le32(sb->features_read_only);
+	info->feat_compat = to_le32(sb->features_compatible);
+	info->feat_incompat = to_le32(sb->features_incompatible);
+	info->bg_desc_reserve_blocks = to_le16(sb->s_reserved_gdt_blocks);
+	strncpy(info->label,sb->volume_name,sizeof(info->label));
+	info->len = (u64int)info->block_size * ext4_sb_get_blocks_cnt(sb);
+	info->dsc_size = to_le16(sb->desc_size);
+	memcpy(info->uuid, sb->uuid, UUID_SIZE);
+
+	return 0;
+}
+
+static u32int compute_blocks_per_group(struct ext4_mkfs_info *info)
+{
+	return info->block_size * 8;
+}
+
+static u32int compute_inodes(struct ext4_mkfs_info *info)
+{
+	return (u32int)EXT4_DIV_ROUND_UP(info->len, info->block_size) / 4;
+}
+
+static u32int compute_inodes_per_group(struct ext4_mkfs_info *info)
+{
+	u32int blocks = (u32int)EXT4_DIV_ROUND_UP(info->len, info->block_size);
+	u32int block_groups = EXT4_DIV_ROUND_UP(blocks, info->blocks_per_group);
+	u32int inodes = EXT4_DIV_ROUND_UP(info->inodes, block_groups);
+	inodes = EXT4_ALIGN(inodes, (info->block_size / info->inode_size));
+
+	/* After properly rounding up the number of inodes/group,
+	 * make sure to update the total inodes field in the info struct.
+	 */
+	info->inodes = inodes * block_groups;
+
+	return inodes;
+}
+
+
+static u32int compute_journal_blocks(struct ext4_mkfs_info *info)
+{
+	u32int journal_blocks = (u32int)EXT4_DIV_ROUND_UP(info->len,
+						 info->block_size) / 64;
+	if (journal_blocks < 1024)
+		journal_blocks = 1024;
+	if (journal_blocks > 32768)
+		journal_blocks = 32768;
+	return journal_blocks;
+}
+
+static bool has_superblock(struct ext4_mkfs_info *info, u32int bgid)
+{
+	if (!(info->feat_ro_compat & EXT4_FRO_COM_SPARSE_SUPER))
+		return true;
+
+	return ext4_sb_sparse(bgid);
+}
+
+int create_fs_aux_info(struct fs_aux_info *aux_info,
+			      struct ext4_mkfs_info *info)
+{
+	aux_info->first_data_block = (info->block_size > 1024) ? 0 : 1;
+	aux_info->len_blocks = info->len / info->block_size;
+	aux_info->inode_table_blocks = EXT4_DIV_ROUND_UP(info->inodes_per_group *
+			info->inode_size, info->block_size);
+	aux_info->groups = (u32int)EXT4_DIV_ROUND_UP(aux_info->len_blocks -
+			aux_info->first_data_block, info->blocks_per_group);
+	aux_info->blocks_per_ind = info->block_size / sizeof(u32int);
+	aux_info->blocks_per_dind =
+			aux_info->blocks_per_ind * aux_info->blocks_per_ind;
+	aux_info->blocks_per_tind =
+			aux_info->blocks_per_dind * aux_info->blocks_per_dind;
+
+	aux_info->bg_desc_blocks =
+		EXT4_DIV_ROUND_UP(aux_info->groups * info->dsc_size,
+			info->block_size);
+
+	aux_info->default_i_flags = EXT4_INODE_FLAG_NOATIME;
+
+	u32int last_group_size = aux_info->len_blocks % info->blocks_per_group;
+	u32int last_header_size = 2 + aux_info->inode_table_blocks;
+	if (has_superblock(info, aux_info->groups - 1))
+		last_header_size += 1 + aux_info->bg_desc_blocks +
+			info->bg_desc_reserve_blocks;
+
+	if (last_group_size > 0 && last_group_size < last_header_size) {
+		aux_info->groups--;
+		aux_info->len_blocks -= last_group_size;
+	}
+
+	aux_info->sb = ext4_calloc(1, EXT4_SUPERBLOCK_SIZE);
+	if (!aux_info->sb) {
+		werrstr(Enomem);
+		return -1;
+	}
+
+	aux_info->bg_desc_blk = ext4_calloc(1, info->block_size);
+	if (!aux_info->bg_desc_blk) {
+		werrstr(Enomem);
+		return -1;
+	}
+
+	aux_info->xattrs = nil;
+
+
+	ext4_dbg(DEBUG_MKFS, DBG_INFO "create_fs_aux_info\n");
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "first_data_block: %ud\n",
+			aux_info->first_data_block);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "len_blocks: %llud\n",
+			aux_info->len_blocks);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "inode_table_blocks: %ud\n",
+			aux_info->inode_table_blocks);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "groups: %ud\n",
+			aux_info->groups);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "bg_desc_blocks: %ud\n",
+			aux_info->bg_desc_blocks);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "default_i_flags: %ud\n",
+			aux_info->default_i_flags);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "blocks_per_ind: %ud\n",
+			aux_info->blocks_per_ind);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "blocks_per_dind: %ud\n",
+			aux_info->blocks_per_dind);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "blocks_per_tind: %ud\n",
+			aux_info->blocks_per_tind);
+
+	return 0;
+}
+
+void release_fs_aux_info(struct fs_aux_info *aux_info)
+{
+	if (aux_info->sb)
+		ext4_free(aux_info->sb);
+	if (aux_info->bg_desc_blk)
+		ext4_free(aux_info->bg_desc_blk);
+}
+
+
+/* Fill in the superblock memory buffer based on the filesystem parameters */
+static void fill_sb(struct fs_aux_info *aux_info, struct ext4_mkfs_info *info)
+{
+	struct ext4_sblock *sb = aux_info->sb;
+
+	sb->inodes_count = to_le32(info->inodes_per_group * aux_info->groups);
+
+	ext4_sb_set_blocks_cnt(sb, aux_info->len_blocks);
+	ext4_sb_set_free_blocks_cnt(sb, aux_info->len_blocks);
+	sb->free_inodes_count = to_le32(info->inodes_per_group * aux_info->groups);
+
+	sb->reserved_blocks_count_lo = to_le32(0);
+	sb->first_data_block = to_le32(aux_info->first_data_block);
+	sb->log_block_size = to_le32(log_2(info->block_size / 1024));
+	sb->log_cluster_size = to_le32(log_2(info->block_size / 1024));
+	sb->blocks_per_group = to_le32(info->blocks_per_group);
+	sb->frags_per_group = to_le32(info->blocks_per_group);
+	sb->inodes_per_group = to_le32(info->inodes_per_group);
+	sb->mount_time = to_le32(0);
+	sb->write_time = to_le32(0);
+	sb->mount_count = to_le16(0);
+	sb->max_mount_count = to_le16(0xFFFF);
+	sb->magic = to_le16(EXT4_SUPERBLOCK_MAGIC);
+	sb->state = to_le16(EXT4_SUPERBLOCK_STATE_VALID_FS);
+	sb->errors = to_le16(EXT4_SUPERBLOCK_ERRORS_RO);
+	sb->minor_rev_level = to_le16(0);
+	sb->last_check_time = to_le32(0);
+	sb->check_interval = to_le32(0);
+	sb->creator_os = to_le32(EXT4_SUPERBLOCK_OS_LINUX);
+	sb->rev_level = to_le32(1);
+	sb->def_resuid = to_le16(0);
+	sb->def_resgid = to_le16(0);
+
+	sb->first_inode = to_le32(EXT4_GOOD_OLD_FIRST_INO);
+	sb->inode_size = to_le16(info->inode_size);
+	sb->block_group_index = to_le16(0);
+
+	sb->features_compatible = to_le32(info->feat_compat);
+	sb->features_incompatible = to_le32(info->feat_incompat);
+	sb->features_read_only = to_le32(info->feat_ro_compat);
+
+	memcpy(sb->uuid, info->uuid, UUID_SIZE);
+
+	memset(sb->volume_name, 0, sizeof(sb->volume_name));
+	strncpy(sb->volume_name, info->label, sizeof(sb->volume_name));
+	memset(sb->last_mounted, 0, sizeof(sb->last_mounted));
+
+	sb->algorithm_usage_bitmap = to_le32(0);
+	sb->s_prealloc_blocks = 0;
+	sb->s_prealloc_dir_blocks = 0;
+	sb->s_reserved_gdt_blocks = to_le16(info->bg_desc_reserve_blocks);
+
+	if (info->feat_compat & EXT4_FCOM_HAS_JOURNAL)
+		sb->journal_inode_number = to_le32(EXT4_JOURNAL_INO);
+
+	sb->journal_backup_type = 1;
+	sb->journal_dev = to_le32(0);
+	sb->last_orphan = to_le32(0);
+	sb->hash_seed[0] = to_le32(0x11111111);
+	sb->hash_seed[1] = to_le32(0x22222222);
+	sb->hash_seed[2] = to_le32(0x33333333);
+	sb->hash_seed[3] = to_le32(0x44444444);
+	sb->default_hash_version = EXT2_HTREE_HALF_MD4;
+	sb->checksum_type = 1;
+	sb->desc_size = to_le16(info->dsc_size);
+	sb->default_mount_opts = to_le32(0);
+	sb->first_meta_bg = to_le32(0);
+	sb->mkfs_time = to_le32(0);
+
+	sb->reserved_blocks_count_hi = to_le32(0);
+	sb->min_extra_isize = to_le32(sizeof(struct ext4_inode) -
+		EXT4_GOOD_OLD_INODE_SIZE);
+	sb->want_extra_isize = to_le32(sizeof(struct ext4_inode) -
+		EXT4_GOOD_OLD_INODE_SIZE);
+	sb->flags = to_le32(EXT4_SUPERBLOCK_FLAGS_SIGNED_HASH);
+}
+
+
+static int write_bgroup_block(struct ext4_blockdev *bd,
+			      struct fs_aux_info *aux_info,
+			      struct ext4_mkfs_info *info,
+			      u32int blk)
+{
+	int r = 0;
+	u32int j;
+	struct ext4_block b;
+
+	u32int block_size = ext4_sb_get_block_size(aux_info->sb);
+
+	for (j = 0; j < aux_info->groups; j++) {
+		u64int bg_start_block = aux_info->first_data_block +
+					  j * info->blocks_per_group;
+		u32int blk_off = 0;
+
+		blk_off += aux_info->bg_desc_blocks;
+		if (has_superblock(info, j)) {
+			bg_start_block++;
+			blk_off += info->bg_desc_reserve_blocks;
+			USED(blk_off);
+		}
+
+		u64int dsc_blk = bg_start_block + blk;
+
+		r = ext4_block_get_noread(bd, &b, dsc_blk);
+		if (r != 0)
+			return r;
+
+		memcpy(b.data, aux_info->bg_desc_blk, block_size);
+
+		ext4_bcache_set_dirty(b.buf);
+		r = ext4_block_set(bd, &b);
+		if (r != 0)
+			return r;
+	}
+
+	return r;
+}
+
+static int write_bgroups(struct ext4_blockdev *bd, struct fs_aux_info *aux_info,
+			 struct ext4_mkfs_info *info)
+{
+	int r;
+
+	struct ext4_block b;
+	struct ext4_bgroup *bg_desc;
+
+	u32int i;
+	u32int bg_free_blk;
+	u64int sb_free_blk = 0;
+	u32int block_size = ext4_sb_get_block_size(aux_info->sb);
+	u32int dsc_size = ext4_sb_get_desc_size(aux_info->sb);
+	u32int dsc_per_block = block_size / dsc_size;
+	u32int k = 0;
+
+	for (i = 0; i < aux_info->groups; i++) {
+		u64int bg_start_block = aux_info->first_data_block +
+			aux_info->first_data_block + i * info->blocks_per_group;
+		u32int blk_off = 0;
+
+		bg_desc = (void *)(aux_info->bg_desc_blk + k * dsc_size);
+		bg_free_blk = info->blocks_per_group -
+				aux_info->inode_table_blocks;
+
+		bg_free_blk -= 2;
+		blk_off += aux_info->bg_desc_blocks;
+
+		if (i == (aux_info->groups - 1))
+			bg_free_blk -= aux_info->first_data_block;
+
+		if (has_superblock(info, i)) {
+			bg_start_block++;
+			blk_off += info->bg_desc_reserve_blocks;
+			bg_free_blk -= info->bg_desc_reserve_blocks + 1;
+			bg_free_blk -= aux_info->bg_desc_blocks;
+		}
+
+		ext4_bg_set_block_bitmap(bg_desc, aux_info->sb,
+					 bg_start_block + blk_off + 1);
+
+		ext4_bg_set_inode_bitmap(bg_desc, aux_info->sb,
+					 bg_start_block + blk_off + 2);
+
+		ext4_bg_set_inode_table_first_block(bg_desc,
+						aux_info->sb,
+						bg_start_block + blk_off + 3);
+
+		ext4_bg_set_free_blocks_count(bg_desc, aux_info->sb,
+					      bg_free_blk);
+
+		ext4_bg_set_free_inodes_count(bg_desc,
+				aux_info->sb, to_le32(aux_info->sb->inodes_per_group));
+
+		ext4_bg_set_used_dirs_count(bg_desc, aux_info->sb, 0);
+
+		ext4_bg_set_flag(bg_desc,
+				 EXT4_BLOCK_GROUP_BLOCK_UNINIT |
+				 EXT4_BLOCK_GROUP_INODE_UNINIT);
+
+		sb_free_blk += bg_free_blk;
+
+		r = ext4_block_get_noread(bd, &b, bg_start_block + blk_off + 1);
+		if (r != 0)
+			return r;
+		memset(b.data, 0, block_size);
+		ext4_bcache_set_dirty(b.buf);
+		r = ext4_block_set(bd, &b);
+		if (r != 0)
+			return r;
+		r = ext4_block_get_noread(bd, &b, bg_start_block + blk_off + 2);
+		if (r != 0)
+			return r;
+		memset(b.data, 0, block_size);
+		ext4_bcache_set_dirty(b.buf);
+		r = ext4_block_set(bd, &b);
+		if (r != 0)
+			return r;
+
+		if (++k != dsc_per_block)
+			continue;
+
+		k = 0;
+		r = write_bgroup_block(bd, aux_info, info, i / dsc_per_block);
+		if (r != 0)
+			return r;
+
+	}
+
+	r = write_bgroup_block(bd, aux_info, info, i / dsc_per_block);
+	if (r != 0)
+		return r;
+
+	ext4_sb_set_free_blocks_cnt(aux_info->sb, sb_free_blk);
+	return r;
+}
+
+int write_sblocks(struct ext4_blockdev *bd, struct fs_aux_info *aux_info,
+			  struct ext4_mkfs_info *info)
+{
+	u64int offset;
+	u32int i;
+	int r;
+
+	/* write out the backup superblocks */
+	for (i = 1; i < aux_info->groups; i++) {
+		if (has_superblock(info, i)) {
+			offset = info->block_size * (aux_info->first_data_block
+				+ i * info->blocks_per_group);
+
+			aux_info->sb->block_group_index = to_le16(i);
+			r = ext4_block_writebytes(bd, offset, aux_info->sb,
+						  EXT4_SUPERBLOCK_SIZE);
+			if (r != 0)
+				return r;
+		}
+	}
+
+	/* write out the primary superblock */
+	aux_info->sb->block_group_index = to_le16(0);
+	return ext4_block_writebytes(bd, EXT4_SUPERBLOCK_OFFSET, aux_info->sb,
+			EXT4_SUPERBLOCK_SIZE);
+}
+
+
+int ext4_mkfs_read_info(struct ext4_blockdev *bd, struct ext4_mkfs_info *info)
+{
+	int r;
+	struct ext4_sblock *sb;
+	r = ext4_block_init(bd);
+	if (r != 0)
+		return r;
+
+	sb = ext4_malloc(EXT4_SUPERBLOCK_SIZE);
+	if (!sb)
+		goto Finish;
+
+
+	r = ext4_sb_read(bd, sb);
+	if (r != 0)
+		goto Finish;
+
+	r = sb2info(sb, info);
+
+Finish:
+	if (sb)
+		ext4_free(sb);
+	ext4_block_fini(bd);
+	return r;
+}
+
+static int mkfs_init(struct ext4_blockdev *bd, struct ext4_mkfs_info *info)
+{
+	int r;
+	struct fs_aux_info aux_info;
+	memset(&aux_info, 0, sizeof(struct fs_aux_info));
+
+	r = create_fs_aux_info(&aux_info, info);
+	if (r != 0)
+		goto Finish;
+
+	fill_sb(&aux_info, info);
+
+	r = write_bgroups(bd, &aux_info, info);
+	if (r != 0)
+		goto Finish;
+
+	r = write_sblocks(bd, &aux_info, info);
+	if (r != 0)
+		goto Finish;
+
+	Finish:
+	release_fs_aux_info(&aux_info);
+	return r;
+}
+
+static int init_bgs(struct ext4_fs *fs)
+{
+	int r = 0;
+	struct ext4_block_group_ref ref;
+	u32int i;
+	u32int bg_count = ext4_block_group_cnt(&fs->sb);
+	for (i = 0; i < bg_count; ++i) {
+		r = ext4_fs_get_block_group_ref(fs, i, &ref);
+		if (r != 0)
+			break;
+
+		r = ext4_fs_put_block_group_ref(&ref);
+		if (r != 0)
+			break;
+	}
+	return r;
+}
+
+static int alloc_inodes(struct ext4_fs *fs)
+{
+	int r = 0;
+	int i;
+	struct ext4_inode_ref inode_ref;
+	for (i = 1; i < 12; ++i) {
+		int filetype = EXT4_DE_REG_FILE;
+
+		switch (i) {
+		case EXT4_ROOT_INO:
+		case EXT4_GOOD_OLD_FIRST_INO:
+			filetype = EXT4_DE_DIR;
+			break;
+		default:
+			break;
+		}
+
+		r = ext4_fs_alloc_inode(fs, &inode_ref, filetype);
+		if (r != 0)
+			return r;
+
+		ext4_inode_set_mode(&fs->sb, inode_ref.inode, 0);
+
+		switch (i) {
+		case EXT4_ROOT_INO:
+		case EXT4_JOURNAL_INO:
+			ext4_fs_inode_blocks_init(fs, &inode_ref);
+			break;
+		}
+
+		ext4_fs_put_inode_ref(&inode_ref);
+	}
+
+	return r;
+}
+
+static int create_dirs(struct ext4_fs *fs)
+{
+	int r;
+	struct ext4_inode_ref root;
+	struct ext4_inode_ref child;
+
+	r = ext4_fs_get_inode_ref(fs, EXT4_ROOT_INO, &root);
+	if (r != 0)
+		return r;
+
+	r = ext4_fs_get_inode_ref(fs, EXT4_GOOD_OLD_FIRST_INO, &child);
+	if (r != 0)
+		return r;
+
+	ext4_inode_set_mode(&fs->sb, child.inode,
+			EXT4_INODE_MODE_DIRECTORY | 0777);
+
+	ext4_inode_set_mode(&fs->sb, root.inode,
+			EXT4_INODE_MODE_DIRECTORY | 0777);
+
+	/* Initialize directory index if supported */
+	if (ext4_sb_feature_com(&fs->sb, EXT4_FCOM_DIR_INDEX)) {
+		r = ext4_dir_dx_init(&root, &root);
+		if (r != 0)
+			return r;
+
+		r = ext4_dir_dx_init(&child, &root);
+		if (r != 0)
+			return r;
+
+		ext4_inode_set_flag(root.inode,	EXT4_INODE_FLAG_INDEX);
+		ext4_inode_set_flag(child.inode, EXT4_INODE_FLAG_INDEX);
+	} else {
+		r = ext4_dir_add_entry(&root, ".", strlen("."), &root);
+		if (r != 0)
+			return r;
+
+		r = ext4_dir_add_entry(&root, "..", strlen(".."), &root);
+		if (r != 0)
+			return r;
+
+		r = ext4_dir_add_entry(&child, ".", strlen("."), &child);
+		if (r != 0)
+			return r;
+
+		r = ext4_dir_add_entry(&child, "..", strlen(".."), &root);
+		if (r != 0)
+			return r;
+	}
+
+	r = ext4_dir_add_entry(&root, "lost+found", strlen("lost+found"), &child);
+	if (r != 0)
+		return r;
+
+	ext4_inode_set_links_cnt(root.inode, 3);
+	ext4_inode_set_links_cnt(child.inode, 2);
+
+	child.dirty = true;
+	root.dirty = true;
+	ext4_fs_put_inode_ref(&child);
+	ext4_fs_put_inode_ref(&root);
+	return r;
+}
+
+static int create_journal_inode(struct ext4_fs *fs,
+				struct ext4_mkfs_info *info)
+{
+	int ret;
+	struct ext4_inode_ref inode_ref;
+	u64int blocks_count;
+
+	if (!info->journal)
+		return 0;
+
+	ret = ext4_fs_get_inode_ref(fs, EXT4_JOURNAL_INO, &inode_ref);
+	if (ret != 0)
+		return ret;
+
+	struct ext4_inode *inode = inode_ref.inode;
+
+	ext4_inode_set_mode(&fs->sb, inode, EXT4_INODE_MODE_FILE | 0600);
+	ext4_inode_set_links_cnt(inode, 1);
+
+	blocks_count = ext4_inode_get_blocks_count(&fs->sb, inode);
+
+	while (blocks_count++ < info->journal_blocks)
+	{
+		ext4_fsblk_t fblock;
+		ext4_lblk_t iblock;
+		struct ext4_block blk;
+
+		ret = ext4_fs_append_inode_dblk(&inode_ref, &fblock, &iblock);
+		if (ret != 0)
+			goto Finish;
+
+		if (iblock != 0)
+			continue;
+
+		ret = ext4_block_get(fs->bdev, &blk, fblock);
+		if (ret != 0)
+			goto Finish;
+
+
+		struct jbd_sb * jbd_sb = (struct jbd_sb * )blk.data;
+		memset(jbd_sb, 0, sizeof(struct jbd_sb));
+
+		jbd_sb->header.magic = to_be32(JBD_MAGIC_NUMBER);
+		jbd_sb->header.blocktype = to_be32(JBD_SUPERBLOCK_V2);
+		jbd_sb->blocksize = to_be32(info->block_size);
+		jbd_sb->maxlen = to_be32(info->journal_blocks);
+		jbd_sb->nr_users = to_be32(1);
+		jbd_sb->first = to_be32(1);
+		jbd_sb->sequence = to_be32(1);
+
+		ext4_bcache_set_dirty(blk.buf);
+		ret = ext4_block_set(fs->bdev, &blk);
+		if (ret != 0)
+			goto Finish;
+	}
+
+	memcpy(fs->sb.journal_blocks, inode->blocks, sizeof(inode->blocks));
+
+	Finish:
+	ext4_fs_put_inode_ref(&inode_ref);
+
+	return ret;
+}
+
+int ext4_mkfs(struct ext4_fs *fs, struct ext4_blockdev *bd,
+	      struct ext4_mkfs_info *info, int fs_type)
+{
+	int r;
+
+	r = ext4_block_init(bd);
+	if (r != 0)
+		return r;
+
+	bd->fs = fs;
+
+	if (info->len == 0)
+		info->len = bd->part_size;
+
+	if (info->block_size == 0)
+		info->block_size = 4096; /*Set block size to default value*/
+
+	/* Round down the filesystem length to be a multiple of the block size */
+	info->len &= ~((u64int)info->block_size - 1);
+
+	if (info->journal_blocks == 0)
+		info->journal_blocks = compute_journal_blocks(info);
+
+	if (info->blocks_per_group == 0)
+		info->blocks_per_group = compute_blocks_per_group(info);
+
+	if (info->inodes == 0)
+		info->inodes = compute_inodes(info);
+
+	if (info->inode_size == 0)
+		info->inode_size = 256;
+
+	info->inodes_per_group = compute_inodes_per_group(info);
+
+	switch (fs_type) {
+	case 2:
+		info->feat_compat = EXT2_SUPPORTED_FCOM;
+		info->feat_ro_compat = EXT2_SUPPORTED_FRO_COM;
+		info->feat_incompat = EXT2_SUPPORTED_FINCOM;
+		break;
+	case 3:
+		info->feat_compat = EXT3_SUPPORTED_FCOM;
+		info->feat_ro_compat = EXT3_SUPPORTED_FRO_COM;
+		info->feat_incompat = EXT3_SUPPORTED_FINCOM;
+		break;
+	case 4:
+		info->feat_compat = EXT4_SUPPORTED_FCOM;
+		info->feat_ro_compat = EXT4_SUPPORTED_FRO_COM;
+		info->feat_incompat = EXT4_SUPPORTED_FINCOM;
+		break;
+	}
+
+	/*TODO: handle this features some day...*/
+	info->feat_incompat &= ~EXT4_FINCOM_META_BG;
+	info->feat_incompat &= ~EXT4_FINCOM_FLEX_BG;
+	info->feat_incompat &= ~EXT4_FINCOM_64BIT;
+
+	info->feat_ro_compat &= ~EXT4_FRO_COM_METADATA_CSUM;
+	info->feat_ro_compat &= ~EXT4_FRO_COM_GDT_CSUM;
+	info->feat_ro_compat &= ~EXT4_FRO_COM_DIR_NLINK;
+	info->feat_ro_compat &= ~EXT4_FRO_COM_EXTRA_ISIZE;
+	info->feat_ro_compat &= ~EXT4_FRO_COM_HUGE_FILE;
+
+	if (info->journal)
+		info->feat_compat |= EXT4_FCOM_HAS_JOURNAL;
+
+	if (info->dsc_size == 0) {
+
+		if (info->feat_incompat & EXT4_FINCOM_64BIT)
+			info->dsc_size = EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE;
+		else
+			info->dsc_size = EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE;
+	}
+
+	info->bg_desc_reserve_blocks = 0;
+
+	ext4_dbg(DEBUG_MKFS, DBG_INFO "Creating filesystem with parameters:\n");
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Size: %llud\n", info->len);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Block size: %ud\n",
+			info->block_size);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Blocks per group: %ud\n",
+			info->blocks_per_group);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Inodes per group: %ud\n",
+			info->inodes_per_group);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Inode size: %ud\n",
+			info->inode_size);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Inodes: %ud\n", info->inodes);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Journal blocks: %ud\n",
+			info->journal_blocks);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Features ro_compat: 0x%x\n",
+			info->feat_ro_compat);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Features compat: 0x%x\n",
+			info->feat_compat);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Features incompat: 0x%x\n",
+			info->feat_incompat);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "BG desc reserve: %ud\n",
+			info->bg_desc_reserve_blocks);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Descriptor size: %uhd\n",
+			info->dsc_size);
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "journal: %s\n",
+			info->journal ? "yes" : "no");
+	ext4_dbg(DEBUG_MKFS, DBG_NONE "Label: %s\n", info->label);
+
+	struct ext4_bcache bc;
+
+	memset(&bc, 0, sizeof(struct ext4_bcache));
+	ext4_block_set_lb_size(bd, info->block_size);
+
+	r = ext4_bcache_init_dynamic(&bc, CONFIG_BLOCK_DEV_CACHE_SIZE,
+				      info->block_size);
+	if (r != 0)
+		goto block_fini;
+
+	/*Bind block cache to block device*/
+	r = ext4_block_bind_bcache(bd, &bc);
+	if (r != 0)
+		goto cache_fini;
+
+	r = ext4_block_cache_write_back(bd, 1);
+	if (r != 0)
+		goto cache_fini;
+
+	r = mkfs_init(bd, info);
+	if (r != 0)
+		goto cache_fini;
+
+	r = ext4_fs_init(fs, bd, false);
+	if (r != 0)
+		goto cache_fini;
+
+	r = init_bgs(fs);
+	if (r != 0)
+		goto fs_fini;
+
+	r = alloc_inodes(fs);
+	if (r != 0)
+		goto fs_fini;
+
+	r = create_dirs(fs);
+	if (r != 0)
+		goto fs_fini;
+
+	r = create_journal_inode(fs, info);
+	if (r != 0)
+		goto fs_fini;
+
+	fs_fini:
+	ext4_fs_fini(fs);
+
+	cache_fini:
+	ext4_block_cache_write_back(bd, 0);
+	ext4_bcache_fini_dynamic(&bc);
+
+	block_fini:
+	ext4_block_fini(bd);
+
+	return r;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_super.c
@@ -1,0 +1,221 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_super.h"
+#include "ext4_crc32.h"
+
+u32int ext4_block_group_cnt(struct ext4_sblock *s)
+{
+	u64int blocks_count = ext4_sb_get_blocks_cnt(s);
+	u32int blocks_per_group = ext4_get32(s, blocks_per_group);
+
+	u32int block_groups_count = (u32int)(blocks_count / blocks_per_group);
+
+	if (blocks_count % blocks_per_group)
+		block_groups_count++;
+
+	return block_groups_count;
+}
+
+u32int ext4_blocks_in_group_cnt(struct ext4_sblock *s, u32int bgid)
+{
+	u32int block_group_count = ext4_block_group_cnt(s);
+	u32int blocks_per_group = ext4_get32(s, blocks_per_group);
+	u64int total_blocks = ext4_sb_get_blocks_cnt(s);
+
+	if (bgid < block_group_count - 1)
+		return blocks_per_group;
+
+	return (u32int)(total_blocks - ((block_group_count - 1) * blocks_per_group));
+}
+
+u32int ext4_inodes_in_group_cnt(struct ext4_sblock *s, u32int bgid)
+{
+	u32int block_group_count = ext4_block_group_cnt(s);
+	u32int inodes_per_group = ext4_get32(s, inodes_per_group);
+	u32int total_inodes = ext4_get32(s, inodes_count);
+
+	if (bgid < block_group_count - 1)
+		return inodes_per_group;
+
+	return (total_inodes - ((block_group_count - 1) * inodes_per_group));
+}
+
+static u32int ext4_sb_csum(struct ext4_sblock *s)
+{
+
+	return ext4_crc32c(EXT4_CRC32_INIT, s,
+			offsetof(struct ext4_sblock, checksum));
+}
+
+static bool ext4_sb_verify_csum(struct ext4_sblock *s)
+{
+	if (!ext4_sb_feature_ro_com(s, EXT4_FRO_COM_METADATA_CSUM))
+		return true;
+
+	if (s->checksum_type != to_le32(EXT4_CHECKSUM_CRC32C))
+		return false;
+
+	return s->checksum == to_le32(ext4_sb_csum(s));
+}
+
+void ext4_sb_set_csum(struct ext4_sblock *s)
+{
+	if (!ext4_sb_feature_ro_com(s, EXT4_FRO_COM_METADATA_CSUM))
+		return;
+
+	s->checksum = to_le32(ext4_sb_csum(s));
+}
+
+int ext4_sb_write(struct ext4_blockdev *bdev, struct ext4_sblock *s)
+{
+	ext4_sb_set_csum(s);
+	return ext4_block_writebytes(bdev, EXT4_SUPERBLOCK_OFFSET, s,
+				     EXT4_SUPERBLOCK_SIZE);
+}
+
+int ext4_sb_read(struct ext4_blockdev *bdev, struct ext4_sblock *s)
+{
+	return ext4_block_readbytes(bdev, EXT4_SUPERBLOCK_OFFSET, s,
+				    EXT4_SUPERBLOCK_SIZE);
+}
+
+bool ext4_sb_check(struct ext4_sblock *s)
+{
+	if (ext4_get16(s, magic) != EXT4_SUPERBLOCK_MAGIC)
+		return false;
+
+	if (ext4_get32(s, inodes_count) == 0)
+		return false;
+
+	if (ext4_sb_get_blocks_cnt(s) == 0)
+		return false;
+
+	if (ext4_get32(s, blocks_per_group) == 0)
+		return false;
+
+	if (ext4_get32(s, inodes_per_group) == 0)
+		return false;
+
+	if (ext4_get16(s, inode_size) < 128)
+		return false;
+
+	if (ext4_get32(s, first_inode) < 11)
+		return false;
+
+	if (ext4_sb_get_desc_size(s) < EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		return false;
+
+	if (ext4_sb_get_desc_size(s) > EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		return false;
+
+	if (!ext4_sb_verify_csum(s))
+		return false;
+
+	return true;
+}
+
+static inline int is_power_of(u32int a, u32int b)
+{
+	while (1) {
+		if (a < b)
+			return 0;
+		if (a == b)
+			return 1;
+		if ((a % b) != 0)
+			return 0;
+		a = a / b;
+	}
+}
+
+bool ext4_sb_sparse(u32int group)
+{
+	if (group <= 1)
+		return 1;
+
+	if (!(group & 1))
+		return 0;
+
+	return (is_power_of(group, 7) || is_power_of(group, 5) ||
+		is_power_of(group, 3));
+}
+
+bool ext4_sb_is_super_in_bg(struct ext4_sblock *s, u32int group)
+{
+	if (ext4_sb_feature_ro_com(s, EXT4_FRO_COM_SPARSE_SUPER) &&
+	    !ext4_sb_sparse(group))
+		return false;
+	return true;
+}
+
+static u32int ext4_bg_num_gdb_meta(struct ext4_sblock *s, u32int group)
+{
+	u32int dsc_per_block =
+	    ext4_sb_get_block_size(s) / ext4_sb_get_desc_size(s);
+
+	u32int metagroup = group / dsc_per_block;
+	u32int first = metagroup * dsc_per_block;
+	u32int last = first + dsc_per_block - 1;
+
+	if (group == first || group == first + 1 || group == last)
+		return 1;
+	return 0;
+}
+
+static u32int ext4_bg_num_gdb_nometa(struct ext4_sblock *s, u32int group)
+{
+	if (!ext4_sb_is_super_in_bg(s, group))
+		return 0;
+	u32int dsc_per_block =
+	    ext4_sb_get_block_size(s) / ext4_sb_get_desc_size(s);
+
+	u32int db_count =
+	    (ext4_block_group_cnt(s) + dsc_per_block - 1) / dsc_per_block;
+
+	if (ext4_sb_feature_incom(s, EXT4_FINCOM_META_BG))
+		return ext4_sb_first_meta_bg(s);
+
+	return db_count;
+}
+
+u32int ext4_bg_num_gdb(struct ext4_sblock *s, u32int group)
+{
+	u32int dsc_per_block =
+	    ext4_sb_get_block_size(s) / ext4_sb_get_desc_size(s);
+	u32int first_meta_bg = ext4_sb_first_meta_bg(s);
+	u32int metagroup = group / dsc_per_block;
+
+	if (!ext4_sb_feature_incom(s,EXT4_FINCOM_META_BG) ||
+	    metagroup < first_meta_bg)
+		return ext4_bg_num_gdb_nometa(s, group);
+
+	return ext4_bg_num_gdb_meta(s, group);
+}
+
+u32int ext4_num_base_meta_clusters(struct ext4_sblock *s,
+				     u32int block_group)
+{
+	u32int num;
+	u32int dsc_per_block =
+	    ext4_sb_get_block_size(s) / ext4_sb_get_desc_size(s);
+
+	num = ext4_sb_is_super_in_bg(s, block_group);
+
+	if (!ext4_sb_feature_incom(s, EXT4_FINCOM_META_BG) ||
+	    block_group < ext4_sb_first_meta_bg(s) * dsc_per_block) {
+		if (num) {
+			num += ext4_bg_num_gdb(s, block_group);
+			num += ext4_get16(s, s_reserved_gdt_blocks);
+		}
+	} else {
+		num += ext4_bg_num_gdb(s, block_group);
+	}
+
+	u32int clustersize = 1024 << ext4_get32(s, log_cluster_size);
+	u32int cluster_ratio = clustersize / ext4_sb_get_block_size(s);
+	u32int v =
+	    (num + cluster_ratio - 1) >> ext4_get32(s, log_cluster_size);
+
+	return v;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_trans.c
@@ -1,0 +1,61 @@
+#include "ext4_config.h"
+#include "ext4.h"
+#include "ext4_fs.h"
+#include "ext4_journal.h"
+
+int ext4_trans_set_block_dirty(struct ext4_buf *buf)
+{
+	int r = 0;
+
+	struct ext4_fs *fs = buf->bc->bdev->fs;
+	struct ext4_block block = {
+		.lb_id = buf->lba,
+		.data = buf->data,
+		.buf = buf
+	};
+
+	if (fs->jbd_journal && fs->curr_trans) {
+		struct jbd_trans *trans = fs->curr_trans;
+		return jbd_trans_set_block_dirty(trans, &block);
+	}
+
+	ext4_bcache_set_dirty(buf);
+	return r;
+}
+
+int ext4_trans_block_get_noread(struct ext4_blockdev *bdev,
+			  struct ext4_block *b,
+			  u64int lba)
+{
+	int r = ext4_block_get_noread(bdev, b, lba);
+	if (r != 0)
+		return r;
+
+	return r;
+}
+
+int ext4_trans_block_get(struct ext4_blockdev *bdev,
+		   struct ext4_block *b,
+		   u64int lba)
+{
+	int r = ext4_block_get(bdev, b, lba);
+	if (r != 0)
+		return r;
+
+	return r;
+}
+
+int ext4_trans_try_revoke_block(struct ext4_blockdev *bdev, u64int lba)
+{
+	int r = 0;
+
+	struct ext4_fs *fs = bdev->fs;
+	if (fs->jbd_journal && fs->curr_trans) {
+		struct jbd_trans *trans = fs->curr_trans;
+		r = jbd_trans_try_revoke_block(trans, lba);
+	} else if (fs->jbd_journal) {
+		r = ext4_block_flush_lba(fs->bdev, lba);
+	}
+
+	return r;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4srv.c
@@ -1,0 +1,1050 @@
+#include "ext4_config.h"
+#include "ext4.h"
+#include <fcall.h>
+#include <thread.h>
+#include <9p.h>
+#include <bio.h>
+#include "ext4_inode.h"
+#include "group.h"
+#include "common.h"
+
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+int mainstacksize = 65536;
+
+typedef struct Aux Aux;
+
+struct Aux {
+	Part *p;
+	u32int uid;
+	char *path;
+	int doff;
+	union {
+		ext4_file *file;
+		ext4_dir *dir;
+	};
+	int type;
+};
+
+enum {
+	Adir,
+	Afile,
+};
+
+static Opts opts = {
+	.group = nil,
+	.cachewb = 0,
+	.asroot = 0,
+	.rdonly = 0,
+	.linkmode = Lhide,
+
+	.fstype = -1,
+	.blksz = 1024,
+	.label = "",
+	.inodesz = 256,
+	.ninode = 0,
+};
+static u32int Root;
+static u8int zero[65536];
+static char *srvname = "ext4";
+
+static char *
+linkresolve(Aux *a, char *s, char **value)
+{
+	char *q, buf[4096+1];
+	usize sz;
+	int res;
+
+	res = 0;
+	if(opts.linkmode == Lresolve && (res = ext4_readlink(s, buf, sizeof(buf), &sz)) == 0){
+		if(sz == sizeof(buf)){
+			werrstr("readlink: %s: path too long", s);
+			free(s);
+			return nil;
+		}
+
+		buf[sz] = 0;
+		if(value != nil)
+			*value = strdup(buf);
+		cleanname(buf);
+		if(buf[0] == '/'){
+			free(s);
+			s = smprint("%M%s", a->p, buf);
+		}else{
+			q = strrchr(s, '/');
+			*q = 0;
+			q = s;
+			s = smprint("%s/%s", q, buf);
+			free(q);
+			cleanname(strchr(s+1, '/'));
+		}
+	}else{
+		if(res != 0)
+			werrstr("readlink: %s: %r", s);
+		if(value != nil)
+			*value = nil;
+	}
+
+	return s;
+}
+
+static char *
+fullpath(Aux *a)
+{
+	return linkresolve(a, smprint("%M/%s", a->p, a->path), nil);
+}
+
+static int
+haveperm(Aux *a, int p, struct ext4_inode *inodeout)
+{
+	struct ext4_inode inode;
+	u32int ino, id;
+	int m, fm;
+	Group *g;
+	char *s;
+
+	switch(p & 3){
+	case OREAD:
+		p = AREAD;	
+		break;
+	case OWRITE:
+		p = AWRITE;
+		break;
+	case ORDWR:
+		p = AREAD|AWRITE;
+		break;
+	case OEXEC:
+		p = AEXEC;	
+		break;
+	default:
+		return 0;
+	}
+	if(p & OTRUNC)
+		p |= AWRITE;
+
+	if((s = fullpath(a)) == nil)
+		return -1;
+	if(ext4_raw_inode_fill(s, &ino, &inode) != 0){
+		werrstr("%s: %r", s);
+		free(s);
+		return -1;
+	}
+	free(s);
+
+	if(inodeout != nil)
+		memmove(inodeout, &inode, sizeof(inode));
+
+	fm = ext4_inode_get_mode(a->p->sb, &inode);
+
+	/* other */
+	m = fm & 7;
+	if((p & m) == p)
+		return 1;
+
+	/* owner */
+	id = ext4_inode_get_uid(&inode);
+	if(a->uid == Root || ((g = findgroupid(&a->p->groups, id)) != nil && ingroup(g, a->uid))){
+		m |= (fm >> 6) & 7;
+		if((p & m) == p)
+			return 1;
+	}
+
+	/* group */
+	id = ext4_inode_get_gid(&inode);
+	if(a->uid == Root || ((g = findgroupid(&a->p->groups, id)) != nil && ingroup(g, a->uid))){
+		m |= (fm >> 3) & 7;
+		if((p & m) == p)
+			return 1;
+	}
+
+	return 0;
+}
+
+static void
+rattach(Req *r)
+{
+	char err[ERRMAX];
+	Aux *a;
+
+	if((a = calloc(1, sizeof(*a))) == nil)
+		respond(r, "memory");
+	else if((a->p = openpart(r->ifcall.aname, &opts)) == nil){
+		free(a);
+		rerrstr(err, sizeof(err));
+		respond(r, err);
+	}else{
+		if(opts.asroot || findgroup(&a->p->groups, r->ifcall.uname, &a->uid) == nil)
+			a->uid = Root;
+
+		incref(a->p);
+		a->type = Adir;
+		a->path = strdup("");
+		r->ofcall.qid = a->p->qidmask;
+		r->fid->qid = a->p->qidmask;
+		r->fid->aux = a;
+		respond(r, nil);
+	}
+}
+
+static u32int
+toext4mode(u32int mode, u32int perm, int creat)
+{
+	u32int e;
+
+	e = 0;
+	mode &= ~OCEXEC;
+
+	if(mode & OTRUNC)
+		e |= O_TRUNC;
+
+	mode &= 3;
+	if(mode == OWRITE)
+		e |= O_WRONLY;
+	else if(mode == ORDWR)
+		e |= O_RDWR;
+
+	if(creat)
+		e |= O_CREAT;
+
+	if(perm & DMEXCL)
+		e |= O_EXCL;
+	if(perm & DMAPPEND)
+		e |= O_APPEND;
+
+	return e;
+}
+
+static void
+ropen(Req *r)
+{
+	char *path;
+	int res;
+	Aux *a;
+
+	a = r->fid->aux;
+	switch(a->type){
+	case Adir:
+		if(r->ifcall.mode != OREAD || !haveperm(a, r->ifcall.mode, nil)){
+			respond(r, Eperm);
+			return;
+		}
+		if(a->dir != nil){
+			respond(r, "double open");
+			return;
+		}
+		if((a->dir = malloc(sizeof(*a->dir))) == nil)
+			goto Nomem;
+		if((path = smprint("%M/%s", a->p, a->path)) == nil){
+			free(a->dir);
+			a->dir = nil;
+			goto Nomem;
+		}
+		res = ext4_dir_open(a->dir, path);
+		free(path);
+		if(res != 0){
+			free(a->dir);
+			a->dir = nil;
+			responderror(r);
+			return;
+		}
+		break;
+
+	case Afile:
+		if(!haveperm(a, r->ifcall.mode, nil)){
+			respond(r, Eperm);
+			return;
+		}
+		if(a->file != nil){
+			respond(r, "double open");
+			return;
+		}
+		if((a->file = malloc(sizeof(*a->file))) == nil)
+			goto Nomem;
+		if((path = smprint("%M/%s", a->p, a->path)) == nil){
+			free(a->file);
+			a->file = nil;
+			goto Nomem;
+		}
+		res = ext4_fopen2(a->file, path, toext4mode(r->ifcall.mode, 0, 0));
+		free(path);
+		if(res != 0){
+			free(a->file);
+			a->file = nil;
+			responderror(r);
+			return;
+		}
+		break;
+
+Nomem:
+		respond(r, "memory");
+		return;
+	}
+
+	r->ofcall.iounit = 0;
+
+	respond(r, nil);
+}
+
+static void
+rcreate(Req *r)
+{
+	u32int perm, dirperm, t;
+	struct ext4_inode inode;
+	char *s, *q;
+	int mkdir;
+	long tm;
+	Aux *a;
+
+	a = r->fid->aux;
+	s = nil;
+
+	if(a->file != nil || a->dir != nil){
+		werrstr("double create");
+		goto error;
+	}
+	if(!haveperm(a, OWRITE, &inode)){
+		werrstr(Eperm);
+		goto error;
+	}
+
+	/* first make sure this is a directory */
+	t = ext4_inode_type(a->p->sb, &inode);
+	if(t != EXT4_INODE_MODE_DIRECTORY){
+		werrstr("create in non-directory");
+		goto error;
+	}
+
+	if((s = fullpath(a)) == nil)
+		goto error;
+	ext4_mode_get(s, &dirperm);
+
+	/* check if the entry already exists */
+	if((q = smprint("%s/%s", s, r->ifcall.name)) == nil){
+Nomem:
+		werrstr("memory");
+		goto error;
+	}
+	free(s);
+	s = q;
+	cleanname(s);
+	if(ext4_inode_exist(s, EXT4_DE_UNKNOWN) == 0){
+		werrstr("file already exists");
+		goto error;
+	}
+
+	mkdir = r->ifcall.perm & DMDIR;
+	perm = mkdir ? 0666 : 0777;
+	perm = r->ifcall.perm & (~perm | (dirperm & perm));
+
+	if(mkdir){
+		a->type = Adir;
+		if(ext4_dir_mk(s) != 0)
+			goto error;
+		if((a->dir = malloc(sizeof(*a->dir))) == nil)
+			goto Nomem;
+		if(ext4_dir_open(a->dir, s) < 0){
+			free(a->dir);
+			a->dir = nil;
+			goto ext4errorrm;
+		}
+	}else{
+		a->type = Afile;
+		if((a->file = malloc(sizeof(*a->file))) == nil)
+			goto Nomem;
+		if(ext4_fopen2(a->file, s, toext4mode(r->ifcall.mode, perm, 1)) < 0){
+			free(a->file);
+			a->file = nil;
+			goto error;
+		}
+	}
+
+	if(ext4_mode_set(s, perm) < 0)
+		goto ext4errorrm;
+	ext4_owner_set(s, a->uid, a->uid);
+	tm = time(nil);
+	ext4_mtime_set(s, tm);
+	ext4_ctime_set(s, tm);
+
+	r->fid->qid.path = a->p->qidmask.path | a->file->inode;
+	r->fid->qid.vers = 0;
+	r->fid->qid.type = 0;
+	r->ofcall.qid = r->fid->qid;
+
+	free(a->path);
+	a->path = strdup(strchr(s+1, '/')+1);
+	free(s);
+	r->ofcall.iounit = 0;
+	respond(r, nil);
+	return;
+
+ext4errorrm:
+	if(mkdir)
+		ext4_dir_rm(s);
+	else
+		ext4_fremove(s);
+error:
+	free(s);
+	responderror(r);
+}
+
+static int
+dirfill(Dir *dir, Aux *a, char *path)
+{
+	struct ext4_inode inode;
+	u32int t, ino, id;
+	char tmp[16];
+	char *s, *q;
+	Group *g;
+
+	memset(dir, 0, sizeof(*dir));
+
+	if(path == nil){
+		path = a->path;
+		s = smprint("%M/%s", a->p, a->path);
+	}else{
+		if(*a->path == 0 && *path == 0)
+			path = "/";
+		s = smprint("%M%s%s/%s", a->p, *a->path ? "/" : "", a->path, path);
+	}
+	if((s = linkresolve(a, s, nil)) == nil)
+		return -1;
+	if(ext4_raw_inode_fill(s, &ino, &inode) < 0){
+		werrstr("inode: %s: %r", s);
+		free(s);
+		return -1;
+	}
+
+	t = ext4_inode_type(a->p->sb, &inode);
+	if(opts.linkmode == Lhide && t == EXT4_INODE_MODE_SOFTLINK){
+		werrstr("softlinks resolving disabled");
+		free(s);
+		return -1;
+	}
+
+	dir->mode = ext4_inode_get_mode(a->p->sb, &inode) & 0x1ff;
+	dir->qid.path = a->p->qidmask.path | ino;
+	dir->qid.vers = ext4_inode_get_generation(&inode);
+	dir->qid.type = 0;
+	if(t == EXT4_INODE_MODE_DIRECTORY){
+		dir->qid.type |= QTDIR;
+		dir->mode |= DMDIR;
+	}else
+		dir->length = ext4_inode_get_size(a->p->sb, &inode);
+	if(ext4_inode_get_flags(&inode) & EXT4_INODE_FLAG_APPEND){
+		dir->qid.type |= QTAPPEND;
+		dir->mode |= DMAPPEND;
+	}
+
+	if((q = strrchr(path, '/')) != nil)
+		path = q+1;
+	dir->name = estrdup9p(path);
+	dir->atime = ext4_inode_get_access_time(&inode);
+	dir->mtime = ext4_inode_get_modif_time(&inode);
+
+	sprint(tmp, "%ud", id = ext4_inode_get_uid(&inode));
+	dir->uid = estrdup9p((g = findgroupid(&a->p->groups, id)) != nil ? g->name : tmp);
+
+	sprint(tmp, "%ud", id = ext4_inode_get_gid(&inode));
+	dir->gid = estrdup9p((g = findgroupid(&a->p->groups, id)) != nil ? g->name : tmp);
+
+	free(s);
+
+	return 0;
+}
+
+static int
+dirgen(int n, Dir *dir, void *aux)
+{
+	const ext4_direntry *e;
+	Aux *a;
+
+	a = aux;
+	if(n == 0 || n != a->doff){
+		ext4_dir_entry_rewind(a->dir);
+		a->doff = 0;
+	}
+
+	for(;;){
+		do{
+			if((e = ext4_dir_entry_next(a->dir)) == nil)
+				return -1;
+		}while(e->name == nil || strcmp((char*)e->name, ".") == 0 || strcmp((char*)e->name, "..") == 0);
+
+		if(opts.linkmode == Lhide && e->inode_type == EXT4_DE_SYMLINK)
+			continue;
+
+		if(a->doff++ != n)
+			continue;
+
+		if(dirfill(dir, a, (char*)e->name) == 0)
+			return 0;
+
+		a->doff--;
+	}
+}
+
+static void
+rread(Req *r)
+{
+	usize n;
+	Aux *a;
+
+	a = r->fid->aux;
+	if(a->type == Adir && a->dir != nil){
+		dirread9p(r, dirgen, a);
+	}else if(a->type == Afile && a->file != nil){
+		if(ext4_fseek(a->file, r->ifcall.offset, 0) != 0)
+			n = 0;
+		else if(ext4_fread(a->file, r->ofcall.data, r->ifcall.count, &n) < 0){
+			responderror(r);
+			return;
+		}
+
+		r->ofcall.count = n;
+	}
+
+	respond(r, nil);
+}
+
+static void
+rwrite(Req *r)
+{
+	usize n, sz;
+	Aux *a;
+
+	a = r->fid->aux;
+	if(a->type == Adir){
+		respond(r, "can't write to dir");
+	}else if(a->type == Afile){
+		while(ext4_fsize(a->file) < r->ifcall.offset){
+			ext4_fseek(a->file, 0, 2);
+			sz = MIN(r->ifcall.offset-ext4_fsize(a->file), sizeof(zero));
+			if(ext4_fwrite(a->file, zero, sz, &n) < 0)
+				goto error;
+		}
+		if(ext4_fseek(a->file, r->ifcall.offset, 0) < 0)
+			goto error;
+		if(ext4_fwrite(a->file, r->ifcall.data, r->ifcall.count, &n) < 0)
+			goto error;
+
+		r->ofcall.count = n;
+		respond(r, nil);
+	}
+
+	return;
+
+error:
+	responderror(r);
+}
+
+static void
+rremove(Req *r)
+{
+	struct ext4_inode inode;
+	const ext4_direntry *e;
+	u32int ino, t, empty;
+	ext4_dir dir;
+	Group *g;
+	char *s;
+	Aux *a;
+
+	a = r->fid->aux;
+
+	/* do not resolve links here as most likely it's JUST the link we want to remove */
+	if((s = smprint("%M/%s", a->p, a->path)) == nil){
+		werrstr("memory");
+		goto error;
+	}
+	if(ext4_raw_inode_fill(s, &ino, &inode) < 0)
+		goto error;
+
+	if(a->uid == Root || ((g = findgroupid(&a->p->groups, ext4_inode_get_uid(&inode))) != nil && g->id == a->uid)){
+		t = ext4_inode_type(a->p->sb, &inode);
+		if(t == EXT4_INODE_MODE_DIRECTORY && ext4_dir_open(&dir, s) == 0){
+			for(empty = 1; empty;){
+				if((e = ext4_dir_entry_next(&dir)) == nil)
+					break;
+				empty = e->name == nil || strcmp((char*)e->name, ".") == 0 || strcmp((char*)e->name, "..") == 0;
+			}
+			ext4_dir_close(&dir);
+			if(!empty){
+				werrstr("directory not empty");
+				goto error;
+			}else if(ext4_dir_rm(s) < 0)
+				goto error;
+		}else if(ext4_fremove(s) < 0)
+			goto error;
+	}else{
+		werrstr(Eperm);
+		goto error;
+	}
+
+	free(s);
+	respond(r, nil);
+	return;
+
+error:
+	free(s);
+	responderror(r);
+}
+
+static void
+rstat(Req *r)
+{
+	Aux *a;
+
+	a = r->fid->aux;
+	if(dirfill(&r->d, a, nil) != 0)
+		responderror(r);
+	else
+		respond(r, nil);
+}
+
+static void
+rwstat(Req *r)
+{
+	int res, isdir, wrperm, isowner, n;
+	struct ext4_inode inode;
+	char *old, *new, *s;
+	u32int uid, gid;
+	ext4_file f;
+	Aux *a, o;
+	Group *g;
+
+	a = r->fid->aux;
+	old = nil;
+	new = nil;
+
+	/* can't do anything to root, can't change the owner */
+	if(a->path[0] == 0 || (r->d.uid != nil && r->d.uid[0] != 0)){
+		werrstr(Eperm);
+		goto error;
+	}
+
+	if((old = smprint("%M/%s", a->p, a->path)) == nil){
+		werrstr("memory");
+		goto error;
+	}
+	new = old;
+
+	wrperm = haveperm(a, OWRITE, &inode);
+	uid = ext4_inode_get_uid(&inode);
+	isowner = uid == Root || a->uid == uid;
+
+	/* permission to truncate */
+	isdir = ext4_inode_type(a->p->sb, &inode) == EXT4_INODE_MODE_DIRECTORY;
+	if(r->d.length >= 0 && (!wrperm || isdir || !ext4_inode_can_truncate(a->p->sb, &inode))){
+		werrstr(Eperm);
+		goto error;
+	}
+
+	/* permission to rename */
+	if(r->d.name != nil && r->d.name[0] != 0){
+		if((s = strrchr(old, '/')) == nil){
+			werrstr("botched name");
+			goto error;
+		}
+		n = s - old;
+		if((new = malloc(n + 1 + strlen(r->d.name) + 1)) == nil){
+			werrstr("memory");
+			goto error;
+		}
+		memmove(new, old, n);
+		new[n++] = '/';
+		strcpy(new+n, r->d.name);
+
+		/* check parent write permission */
+		o = *a;
+		o.path = old;
+		if(!haveperm(&o, OWRITE, nil)){
+			werrstr(Eperm);
+			goto error;
+		}
+		*s = '/';
+	}
+
+	/* permission to change mode */
+	if(r->d.mode != ~0){
+		/* has to be owner and can't change dir bit */
+		if(!isowner || (!!isdir != !!(r->d.mode & DMDIR))){
+			werrstr(Eperm);
+			goto error;
+		}
+	}
+
+	/* permission to change mtime */
+	if(r->d.mtime != ~0 && !isowner){
+		werrstr(Eperm);
+		goto error;
+	}
+
+	/* permission to change gid */
+	if(r->d.gid != nil && r->d.gid[0] != 0){
+		/* has to be the owner, group has to exist, must be in that group */
+		if(!isowner || (g = findgroup(&a->p->groups, r->d.gid, &gid)) == nil || !ingroup(g, a->uid)){
+			werrstr(Eperm);
+			goto error;
+		}
+	}
+
+	/* done checking permissions, now apply all the changes and hope it all works */
+
+	/* rename */
+	if(r->d.name != nil && r->d.name[0] != 0){
+		if(ext4_frename(old, new) < 0)
+			goto error;
+
+		free(old);
+		old = new;
+		new = nil;
+
+		free(a->path);
+		a->path = strdup(strchr(old+1, '/')+1);
+	}
+
+	/* truncate */
+	if(r->d.length >= 0){
+		if(ext4_fopen2(&f, new, toext4mode(OWRITE, 0, 0)) < 0)
+			goto error;
+		res = ext4_ftruncate(&f, r->d.length);
+		ext4_fclose(&f);
+		if(res != 0)
+			goto error;
+	}
+
+	/* mode */
+	if(r->d.mode != ~0 && ext4_mode_set(new, r->d.mode & 0x1ff) < 0)
+		goto error;
+
+	/* mtime */
+	if(r->d.mtime != ~0 && ext4_mtime_set(new, r->d.mtime) < 0)
+		goto error;
+
+	/* gid */
+	if(r->d.gid != nil && r->d.gid[0] != 0 && ext4_owner_set(new, uid, gid) < 0)
+		goto error;
+
+	free(old);
+	if(new != old)
+		free(new);
+	respond(r, nil);
+	return;
+
+error:
+	free(old);
+	if(new != old)
+		free(new);
+	responderror(r);
+}
+
+static char *
+rwalk1(Fid *fid, char *name, Qid *qid)
+{
+	static char errbuf[ERRMAX];
+	struct ext4_inode inode;
+	u32int ino, t;
+	Aux *a, dir;
+	char *s, *q;
+
+	a = fid->aux;
+
+	/* try walking to the real file first */
+	if((s = fullpath(a)) == nil){
+		/* else try link itself. might want to just remove it anyway */
+		if((s = smprint("%M/%s", a->p, a->path)) == nil)
+			return "memory";
+	}
+	if(ext4_raw_inode_fill(s, &ino, &inode) < 0)
+		goto error;
+	t = ext4_inode_type(a->p->sb, &inode);
+	if(t != EXT4_INODE_MODE_DIRECTORY){
+		free(s);
+		return "not a directory";
+	}
+	dir = *a;
+	dir.path = strchr(s+1, '/')+1;
+	if(!haveperm(&dir, OEXEC, nil)){
+		free(s);
+		return Eperm;
+	}
+
+	q = s;
+	s = smprint("%s/%s", q, name);
+	cleanname(strchr(s+1, '/'));
+	free(q);
+	if((q = linkresolve(a, s, nil)) == nil){
+error:
+		free(s);
+		rerrstr(errbuf, sizeof(errbuf));
+		return errbuf;
+	}
+	s = q;
+	if(ext4_raw_inode_fill(s, &ino, &inode) < 0)
+		goto error;
+	t = ext4_inode_type(a->p->sb, &inode);
+	if(opts.linkmode == Lhide && t == EXT4_INODE_MODE_SOFTLINK){
+		free(s);
+		return "not found";
+	}
+	qid->type = 0;
+	qid->path = a->p->qidmask.path | ino;
+	qid->vers = ext4_inode_get_generation(&inode);
+	if(t == EXT4_INODE_MODE_DIRECTORY){
+		qid->type |= QTDIR;
+		a->type = Adir;
+	}else
+		a->type = Afile;
+	if(ext4_inode_get_flags(&inode) & EXT4_INODE_FLAG_APPEND)
+		qid->type |= QTAPPEND;
+	free(a->path);
+	a->path = strdup(strchr(s+1, '/')+1);
+	free(s);
+	fid->qid = *qid;
+
+	return nil;
+}
+
+static char *
+rclone(Fid *oldfid, Fid *newfid)
+{
+	Aux *a, *c;
+
+	a = oldfid->aux;
+
+	if((c = calloc(1, sizeof(*c))) == nil)
+		return "memory";
+	memmove(c, a, sizeof(*c));
+	c->path = strdup(a->path);
+	c->file = nil;
+	c->dir = nil;
+
+	incref(c->p);
+	newfid->aux = c;
+
+	return nil;
+}
+
+static void
+rdestroyfid(Fid *fid)
+{
+	Aux *a;
+
+	a = fid->aux;
+	if(a == nil)
+		return;
+	fid->aux = nil;
+
+	if(a->type == Adir && a->dir != nil){
+		ext4_dir_close(a->dir);
+		free(a->dir);
+	}else if(a->type == Afile && a->file != nil){
+		ext4_fclose(a->file);
+		free(a->file);
+	}
+
+	if(decref(a->p) < 1)
+		closepart(a->p);
+	free(a->path);
+	free(a);
+}
+
+static int
+note(void *, char *s)
+{
+	if(strncmp(s, "sys:", 4) != 0){
+		closeallparts();
+		close(0);
+		return 1;
+	}
+
+	return 0;
+}
+
+static void
+cmdsrv(void *)
+{
+	char s[32], *c, *a[4];
+	int f, p[2], n;
+	Biobuf b;
+
+	if(pipe(p) < 0)
+		sysfatal("%r");
+	snprint(s, sizeof(s), "#s/%s.cmd", srvname);
+	if((f = create(s, ORCLOSE|OWRITE, 0660)) < 0){
+		remove(s);
+		if((f = create(s, ORCLOSE|OWRITE, 0660)) < 0)
+			sysfatal("%r");
+	}
+	if(fprint(f, "%d", p[0]) < 1)
+		sysfatal("srv write");
+
+	dup(p[1], 0);
+	close(p[1]);
+	close(p[0]);
+
+	Binit(&b, 0, OREAD);
+	for(; (c = Brdstr(&b, '\n', 1)) != nil; free(c)){
+		if((n = tokenize(c, a, nelem(a))) < 1)
+			continue;
+		USED(n);
+		if(strcmp(a[0], "stats") == 0 || strcmp(a[0], "df") == 0){
+			statallparts();
+		}else if(strcmp(a[0], "halt") == 0){
+			closeallparts();
+			close(0);
+			threadexitsall(nil);
+		}else if(strcmp(a[0], "sync") == 0){
+			syncallparts();
+		}else{
+			print("unknown command: %s\n", a[0]);
+		}
+	}
+}
+
+static void
+rstart(Srv *)
+{
+	threadnotify(note, 1);
+	proccreate(cmdsrv, nil, mainstacksize);
+}
+
+static void
+rend(Srv *)
+{
+	closeallparts();
+	close(0);
+	threadexitsall(nil);
+}
+
+static Srv fs = {
+	.attach = rattach,
+	.open = ropen,
+	.create = rcreate,
+	.read = rread,
+	.write = rwrite,
+	.remove = rremove,
+	.stat = rstat,
+	.wstat = rwstat,
+	.walk1 = rwalk1,
+	.clone = rclone,
+	.destroyfid = rdestroyfid,
+	.start = rstart,
+	.end = rend,
+};
+
+static void
+usage(void)
+{
+	fprint(2, "usage: %s [-Clrs] [-g groupfile] [-R uid] [srvname]\n", argv0);
+	fprint(2, "mkfs:  %s -M (2|3|4) [-L label] [-b blksize] [-N numinodes] [-I inodesize] device\n", argv0);
+	threadexitsall("usage");
+}
+
+void
+threadmain(int argc, char **argv)
+{
+	char *gr;
+	vlong sz;
+	int f, stdio;
+
+	rfork(RFNOTEG);
+
+	stdio = 0;
+	ARGBEGIN{
+	case 'D':
+		chatty9p++;
+nomkfs:
+		if(opts.fstype > 0)
+			usage();
+		opts.fstype = 0;
+		break;
+	case 'd':
+		ext4_dmask_set(strtoul(EARGF(usage()), nil, 0));
+		break;
+	case 'C':
+		opts.cachewb = 1;
+		goto nomkfs;
+	case 'l':
+		opts.linkmode = Lresolve;
+		goto nomkfs;
+	case 'g':
+		gr = EARGF(usage());
+		if((f = open(gr, OREAD)) < 0)
+			sysfatal("%r");
+		sz = seek(f, 0, 2);
+		if(sz < 0)
+			sysfatal("%s: invalid group file", gr);
+		if((opts.group = malloc(sz+1)) == nil)
+			sysfatal("memory");
+		seek(f, 0, 0);
+		if(readn(f, opts.group, sz) != sz)
+			sysfatal("%s: read failed", gr);
+		close(f);
+		opts.group[sz] = 0;
+		goto nomkfs;
+	case 'R':
+		opts.asroot = 1;
+		Root = atoll(EARGF(usage()));
+		goto nomkfs;
+	case 'r':
+		opts.rdonly = 1;
+		goto nomkfs;
+	case 's':
+		stdio = 1;
+		goto nomkfs;
+	case 'M':
+		if(!opts.fstype)
+			usage();
+		opts.fstype = atoi(EARGF(usage()));
+		if(opts.fstype < 2 || opts.fstype > 4)
+			usage();
+		break;
+
+	case 'b':
+		opts.blksz = atoi(EARGF(usage()));
+		if(opts.blksz != 1024 && opts.blksz != 2048 && opts.blksz != 4096)
+			usage();
+yesmkfs:
+		if(opts.fstype < 1)
+			usage();
+		break;
+	case 'L':
+		opts.label = EARGF(usage());
+		goto yesmkfs;
+	case 'I':
+		opts.inodesz = atoi(EARGF(usage()));
+		if(opts.inodesz < 128 || ((opts.inodesz-1) & opts.inodesz) != 0)
+			usage();
+		goto yesmkfs;
+	case 'N':
+		opts.ninode = atoi(EARGF(usage()));
+		if(opts.ninode < 1)
+			usage();
+		goto yesmkfs;
+
+	default:
+		usage();
+	}ARGEND
+
+	if(opts.fstype > 1){
+		if(argc != 1)
+			usage();
+		if(openpart(argv[0], &opts) == nil)
+			sysfatal("%r");
+		closeallparts();
+		threadexitsall(nil);
+	}else{
+		if(!stdio && argc == 1)
+			srvname = *argv;
+		else if(argc != 0)
+			usage();
+
+		if(stdio){
+			fs.infd = 0;
+			fs.outfd = 1;
+			threadsrv(&fs);
+		}else
+			threadpostsrv(&fs, srvname);
+		threadexits(nil);
+	}
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/group.c
@@ -1,0 +1,130 @@
+#include <u.h>
+#include <libc.h>
+#include "group.h"
+
+int
+loadgroups(Groups *gs, char *raw)
+{
+	char *m, *s, *e, *a[5], *ide;
+	Group *g, *memb;
+	int line, n, k;
+	vlong id;
+
+	memset(gs, 0, sizeof(*gs));
+	if((gs->raw = strdup(raw)) == nil)
+		goto error;
+
+	line = 1;
+	for(s = gs->raw; *s; s = e+1, line++){
+		if((e = strchr(s, '\n')) != nil)
+			*e = 0;
+
+		if((n = getfields(s, a, nelem(a), 1, ":")) >= 3 && strlen(a[0]) > 0 && strlen(a[2]) > 0){
+			id = strtoll(a[2], &ide, 0);
+			if(id < 0 || id > 0xffffffff || *ide != 0){
+				werrstr("invalid uid: %s", a[2]);
+				goto error;
+			}
+
+			if((g = realloc(gs->g, (gs->ng+1)*sizeof(Group))) == nil)
+				goto error;
+			gs->g = g;
+			g += gs->ng++;
+			memset(g, 0, sizeof(*g));
+			g->id = id;
+			g->name = a[0];
+			for(m = a[3]; n > 3 && *m; *m++ = 0){
+				if((memb = realloc(g->memb, (g->nmemb+1)*sizeof(Group))) == nil)
+					goto error;
+				g->memb = memb;
+				memb += g->nmemb++;
+				memset(memb, 0, sizeof(*memb));
+				memb->name = m;
+				if((m = strchr(m, ',')) == nil)
+					break;
+			}
+		}else{
+			werrstr("line %d: invalid record", line);
+			goto error;
+		}
+
+		if(e == nil)
+			break;
+	}
+
+	g = gs->g;
+	for(n = 0; n < gs->ng; n++, g++){
+		for(k = 0, memb = g->memb; k < g->nmemb; k++, memb++)
+			findgroup(gs, memb->name, &memb->id);
+	}
+
+	return 0;
+error:
+	werrstr("togroups: %r");
+	freegroups(gs);
+
+	return -1;
+}
+
+void
+freegroups(Groups *gs)
+{
+	int i;
+
+	for(i = 0; i < gs->ng; i++)
+		free(gs->g[i].memb);
+	free(gs->g);
+	free(gs->raw);
+}
+
+Group *
+findgroup(Groups *gs, char *name, u32int *id)
+{
+	Group *g;
+	int i;
+
+	g = gs->g;
+	for(i = 0; i < gs->ng; i++, g++){
+		if(strcmp(g->name, name) == 0){
+			if(id != nil)
+				*id = g->id;
+			return g;
+		}
+	}
+
+	if(id != nil)
+		*id = ~0;
+
+	return nil;
+}
+
+Group *
+findgroupid(Groups *gs, u32int id)
+{
+	Group *g;
+	int i;
+
+	g = gs->g;
+	for(i = 0; i < gs->ng; i++, g++){
+		if(g->id == id)
+			return g;
+	}
+
+	return nil;
+}
+
+int
+ingroup(Group *g, u32int id)
+{
+	int i;
+
+	if(g->id == id)
+		return 1;
+
+	for(i = g->nmemb, g = g->memb; i > 0; i--, g++){
+		if(g->id == id)
+			return 1;
+	}
+
+	return 0;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/group.h
@@ -1,0 +1,21 @@
+typedef struct Group Group;
+typedef struct Groups Groups;
+
+struct Group {
+	u32int id;
+	char *name;
+	Group *memb;
+	int nmemb;
+};
+
+struct Groups {
+	char *raw;
+	Group *g;
+	int ng;
+};
+
+int loadgroups(Groups *gs, char *raw);
+void freegroups(Groups *gs);
+Group *findgroup(Groups *gs, char *name, u32int *id);
+Group *findgroupid(Groups *gs, u32int id);
+int ingroup(Group *g, u32int id);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4.h
@@ -1,0 +1,529 @@
+#pragma once
+
+#include "ext4_types.h"
+#include "ext4_debug.h"
+#include "ext4_blockdev.h"
+
+#pragma incomplete struct ext4_mountpoint
+
+/********************************OS LOCK INFERFACE***************************/
+
+/**@brief   OS dependent lock interface.*/
+struct ext4_lock {
+
+	/**@brief   Lock access to mount point.*/
+	void (*lock)(void *aux);
+
+	/**@brief   Unlock access to mount point.*/
+	void (*unlock)(void *aux);
+
+	/**@brief   Auxilary pointer.*/
+	void *p_user;
+};
+
+/********************************FILE DESCRIPTOR*****************************/
+
+/**@brief   File descriptor. */
+typedef struct ext4_file {
+
+	/**@brief   Mount point handle.*/
+	struct ext4_mountpoint *mp;
+
+	/**@brief   File inode id.*/
+	u32int inode;
+
+	/**@brief   Open flags.*/
+	u32int flags;
+
+	/**@brief   File size.*/
+	u64int fsize;
+
+	/**@brief   Actual file position.*/
+	u64int fpos;
+} ext4_file;
+
+/*****************************DIRECTORY DESCRIPTOR***************************/
+
+/**@brief   Directory entry descriptor. */
+typedef struct ext4_direntry {
+	u32int inode;
+	u16int entry_length;
+	u8int name_length;
+	u8int inode_type;
+	u8int name[255];
+} ext4_direntry;
+
+/**@brief   Directory descriptor. */
+typedef struct ext4_dir {
+	/**@brief   File descriptor.*/
+	ext4_file f;
+	/**@brief   Current directory entry.*/
+	ext4_direntry de;
+	/**@brief   Next entry offset.*/
+	u64int next_off;
+} ext4_dir;
+
+/********************************MOUNT OPERATIONS****************************/
+
+/**@brief   Register block device.
+ *
+ * @param   bd Block device.
+ * @param   dev_name Block device name.
+ *
+ * @return  Standard error code.*/
+int ext4_device_register(struct ext4_blockdev *bd,
+			 const char *dev_name);
+
+/**@brief   Un-register block device.
+ *
+ * @param   dev_name Block device name.
+ *
+ * @return  Standard error code.*/
+int ext4_device_unregister(const char *dev_name);
+
+/**@brief   Un-register all block devices.
+ *
+ * @return  Standard error code.*/
+int ext4_device_unregister_all(void);
+
+/**@brief   Mount a block device with EXT4 partition to the mount point.
+ *
+ * @param   dev_name Block device name (@ref ext4_device_register).
+ * @param   mount_point Mount point, for example:
+ *          -   /
+ *          -   /my_partition/
+ *          -   /my_second_partition/
+ * @param   read_only mount as read-only mode.
+ *
+ * @return Standard error code */
+int ext4_mount(const char *dev_name,
+	       const char *mount_point,
+	       bool read_only);
+
+/**@brief   Umount operation.
+ *
+ * @param   mount_point Mount point.
+ *
+ * @return  Standard error code */
+int ext4_umount(const char *mount_point);
+
+/**@brief   Starts journaling. Journaling start/stop functions are transparent
+ *          and might be used on filesystems without journaling support.
+ * @warning Usage:
+ *              ext4_mount("sda1", "/");
+ *              ext4_journal_start("/");
+ *
+ *              //File operations here...
+ *
+ *              ext4_journal_stop("/");
+ *              ext4_umount("/");
+ * @param   mount_point Mount point.
+ *
+ * @return  Standard error code. */
+int ext4_journal_start(const char *mount_point);
+
+/**@brief   Stops journaling. Journaling start/stop functions are transparent
+ *          and might be used on filesystems without journaling support.
+ *
+ * @param   mount_point Mount point name.
+ *
+ * @return  Standard error code. */
+int ext4_journal_stop(const char *mount_point);
+
+/**@brief   Journal recovery.
+ * @warning Must be called after @ref ext4_mount.
+ *
+ * @param   mount_point Mount point.
+ *
+ * @return Standard error code. */
+int ext4_recover(const char *mount_point);
+
+/**@brief   Some of the filesystem stats. */
+struct ext4_mount_stats {
+	u32int inodes_count;
+	u32int free_inodes_count;
+	u64int blocks_count;
+	u64int free_blocks_count;
+
+	u32int block_size;
+	u32int block_group_count;
+	u32int blocks_per_group;
+	u32int inodes_per_group;
+
+	char volume_name[16];
+};
+
+/**@brief   Get file mount point stats.
+ *
+ * @param   mount_point Mount point.
+ * @param   stats Filesystem stats.
+ *
+ * @return Standard error code. */
+int ext4_mount_point_stats(const char *mount_point,
+			   struct ext4_mount_stats *stats);
+
+/**@brief   Setup OS lock routines.
+ *
+ * @param   mount_point Mount point.
+ * @param   locks  Lock and unlock functions
+ *
+ * @return Standard error code. */
+int ext4_mount_setup_locks(const char *mount_point,
+			   const struct ext4_lock *locks);
+
+/**@brief   Acquire the filesystem superblock pointer of a mp.
+ *
+ * @param   mount_point Mount point.
+ * @param   sb Superblock handle
+ *
+ * @return Standard error code. */
+int ext4_get_sblock(const char *mount_point, struct ext4_sblock **sb);
+
+/**@brief   Enable/disable write back cache mode.
+ * @warning Default model of cache is write through. It means that when you do:
+ *
+ *          ext4_fopen(...);
+ *          ext4_fwrite(...);
+ *                           < --- data is flushed to physical drive
+ *
+ *          When you do:
+ *          ext4_cache_write_back(..., 1);
+ *          ext4_fopen(...);
+ *          ext4_fwrite(...);
+ *                           < --- data is NOT flushed to physical drive
+ *          ext4_cache_write_back(..., 0);
+ *                           < --- when write back mode is disabled all
+ *                                 cache data will be flushed
+ * To enable write back mode permanently just call this function
+ * once after ext4_mount (and disable before ext4_umount).
+ *
+ * Some of the function use write back cache mode internally.
+ * If you enable write back mode twice you have to disable it twice
+ * to flush all data:
+ *
+ *      ext4_cache_write_back(..., 1);
+ *      ext4_cache_write_back(..., 1);
+ *
+ *      ext4_cache_write_back(..., 0);
+ *      ext4_cache_write_back(..., 0);
+ *
+ * Write back mode is useful when you want to create a lot of empty
+ * files/directories.
+ *
+ * @param   path Path.
+ * @param   on Enable/disable cache writeback mode.
+ *
+ * @return Standard error code. */
+int ext4_cache_write_back(const char *path, bool on);
+
+
+/**@brief   Force cache flush.
+ *
+ * @param   path Path.
+ *
+ * @return  Standard error code. */
+int ext4_cache_flush(const char *path);
+
+/********************************FILE OPERATIONS*****************************/
+
+/**@brief   Remove file by path.
+ *
+ * @param   path Path to file.
+ *
+ * @return  Standard error code. */
+int ext4_fremove(const char *path);
+
+/**@brief   Create a hardlink for a file.
+ *
+ * @param   path Path to file.
+ * @param   hardlink_path Path of hardlink.
+ *
+ * @return  Standard error code. */
+int ext4_flink(const char *path, const char *hardlink_path);
+
+/**@brief Rename file.
+ * @param path Source.
+ * @param new_path Destination.
+ * @return  Standard error code. */
+int ext4_frename(const char *path, const char *new_path);
+
+/**@brief   File open function.
+ *
+ * @param   file  File handle.
+ * @param   path  File path, has to start from mount point:/my_partition/file.
+ * @param   flags File open flags.
+ *  |---------------------------------------------------------------|
+ *  |   r or rb                 O_RDONLY                            |
+ *  |---------------------------------------------------------------|
+ *  |   w or wb                 O_WRONLY|O_CREAT|O_TRUNC            |
+ *  |---------------------------------------------------------------|
+ *  |   a or ab                 O_WRONLY|O_CREAT|O_APPEND           |
+ *  |---------------------------------------------------------------|
+ *  |   r+ or rb+ or r+b        O_RDWR                              |
+ *  |---------------------------------------------------------------|
+ *  |   w+ or wb+ or w+b        O_RDWR|O_CREAT|O_TRUNC              |
+ *  |---------------------------------------------------------------|
+ *  |   a+ or ab+ or a+b        O_RDWR|O_CREAT|O_APPEND             |
+ *  |---------------------------------------------------------------|
+ *
+ * @return  Standard error code.*/
+int ext4_fopen(ext4_file *file, const char *path, const char *flags);
+
+/**@brief   Alternate file open function.
+ *
+ * @param   file  File handle.
+ * @param   path  File path, has to start from mount point:/my_partition/file.
+ * @param   flags File open flags.
+ *
+ * @return  Standard error code.*/
+int ext4_fopen2(ext4_file *file, const char *path, int flags);
+
+/**@brief   File close function.
+ *
+ * @param   file File handle.
+ *
+ * @return  Standard error code.*/
+int ext4_fclose(ext4_file *file);
+
+
+/**@brief   File truncate function.
+ *
+ * @param   file File handle.
+ * @param   size New file size.
+ *
+ * @return  Standard error code.*/
+int ext4_ftruncate(ext4_file *file, u64int size);
+
+/**@brief   Read data from file.
+ *
+ * @param   file File handle.
+ * @param   buf  Output buffer.
+ * @param   size Bytes to read.
+ * @param   rcnt Bytes read (nil allowed).
+ *
+ * @return  Standard error code.*/
+int ext4_fread(ext4_file *file, void *buf, usize size, usize *rcnt);
+
+/**@brief   Write data to file.
+ *
+ * @param   file File handle.
+ * @param   buf  Data to write
+ * @param   size Write length..
+ * @param   wcnt Bytes written (nil allowed).
+ *
+ * @return  Standard error code.*/
+int ext4_fwrite(ext4_file *file, const void *buf, usize size, usize *wcnt);
+
+/**@brief   File seek operation.
+ *
+ * @param   file File handle.
+ * @param   offset Offset to seek.
+ * @param   origin Seek type:
+ *              @ref SEEK_SET
+ *              @ref SEEK_CUR
+ *              @ref SEEK_END
+ *
+ * @return  Standard error code.*/
+int ext4_fseek(ext4_file *file, s64int offset, u32int origin);
+
+/**@brief   Get file position.
+ *
+ * @param   file File handle.
+ *
+ * @return  Actual file position */
+u64int ext4_ftell(ext4_file *file);
+
+/**@brief   Get file size.
+ *
+ * @param   file File handle.
+ *
+ * @return  File size. */
+u64int ext4_fsize(ext4_file *file);
+
+
+/**@brief Get inode of file/directory/link.
+ *
+ * @param path    Parh to file/dir/link.
+ * @param ret_ino Inode number.
+ * @param inode   Inode internals.
+ *
+ * @return  Standard error code.*/
+int ext4_raw_inode_fill(const char *path, u32int *ret_ino,
+			struct ext4_inode *inode);
+
+/**@brief Check if inode exists.
+ *
+ * @param path    Parh to file/dir/link.
+ * @param type    Inode type.
+ *                @ref EXT4_DE_UNKNOWN
+ *                @ref EXT4_DE_REG_FILE
+ *                @ref EXT4_DE_DIR
+ *                @ref EXT4_DE_CHRDEV
+ *                @ref EXT4_DE_BLKDEV
+ *                @ref EXT4_DE_FIFO
+ *                @ref EXT4_DE_SOCK
+ *                @ref EXT4_DE_SYMLINK
+ *
+ * @return  Standard error code.*/
+int ext4_inode_exist(const char *path, int type);
+
+/**@brief Change file/directory/link mode bits.
+ *
+ * @param path Path to file/dir/link.
+ * @param mode New mode bits (for example 0777).
+ *
+ * @return  Standard error code.*/
+int ext4_mode_set(const char *path, u32int mode);
+
+
+/**@brief Get file/directory/link mode bits.
+ *
+ * @param path Path to file/dir/link.
+ * @param mode New mode bits (for example 0777).
+ *
+ * @return  Standard error code.*/
+int ext4_mode_get(const char *path, u32int *mode);
+
+/**@brief Change file owner and group.
+ *
+ * @param path Path to file/dir/link.
+ * @param uid  User id.
+ * @param gid  Group id.
+ *
+ * @return  Standard error code.*/
+int ext4_owner_set(const char *path, u32int uid, u32int gid);
+
+/**@brief Get file/directory/link owner and group.
+ *
+ * @param path Path to file/dir/link.
+ * @param uid  User id.
+ * @param gid  Group id.
+ *
+ * @return  Standard error code.*/
+int ext4_owner_get(const char *path, u32int *uid, u32int *gid);
+
+/**@brief Set file/directory/link access time.
+ *
+ * @param path  Path to file/dir/link.
+ * @param atime Access timestamp.
+ *
+ * @return  Standard error code.*/
+int ext4_atime_set(const char *path, u32int atime);
+
+/**@brief Set file/directory/link modify time.
+ *
+ * @param path  Path to file/dir/link.
+ * @param mtime Modify timestamp.
+ *
+ * @return  Standard error code.*/
+int ext4_mtime_set(const char *path, u32int mtime);
+
+/**@brief Set file/directory/link change time.
+ *
+ * @param path  Path to file/dir/link.
+ * @param ctime Change timestamp.
+ *
+ * @return  Standard error code.*/
+int ext4_ctime_set(const char *path, u32int ctime);
+
+/**@brief Get file/directory/link access time.
+ *
+ * @param path  Path to file/dir/link.
+ * @param atime Access timestamp.
+ *
+ * @return  Standard error code.*/
+int ext4_atime_get(const char *path, u32int *atime);
+
+/**@brief Get file/directory/link modify time.
+ *
+ * @param path  Path to file/dir/link.
+ * @param mtime Modify timestamp.
+ *
+ * @return  Standard error code.*/
+int ext4_mtime_get(const char *path, u32int *mtime);
+
+/**@brief Get file/directory/link change time.
+ *
+ * @param path  Pathto file/dir/link.
+ * @param ctime Change timestamp.
+ *
+ * @return  standard error code*/
+int ext4_ctime_get(const char *path, u32int *ctime);
+
+/**@brief Create symbolic link.
+ *
+ * @param target Destination entry path.
+ * @param path   Source entry path.
+ *
+ * @return  Standard error code.*/
+int ext4_fsymlink(const char *target, const char *path);
+
+/**@brief Create special file.
+ * @param path     Path to new special file.
+ * @param filetype Filetype of the new special file.
+ * 	           (that must not be regular file, directory, or unknown type)
+ * @param dev      If filetype is char device or block device,
+ * 	           the device number will become the payload in the inode.
+ * @return  Standard error code.*/
+int ext4_mknod(const char *path, int filetype, u32int dev);
+
+/**@brief Read symbolic link payload.
+ *
+ * @param path    Path to symlink.
+ * @param buf     Output buffer.
+ * @param bufsize Output buffer max size.
+ * @param rcnt    Bytes read.
+ *
+ * @return  Standard error code.*/
+int ext4_readlink(const char *path, char *buf, usize bufsize, usize *rcnt);
+
+/*********************************DIRECTORY OPERATION***********************/
+
+/**@brief   Recursive directory remove.
+ *
+ * @param   path Directory path to remove
+ *
+ * @return  Standard error code.*/
+int ext4_dir_rm(const char *path);
+
+/**@brief Rename/move directory.
+ *
+ * @param path     Source path.
+ * @param new_path Destination path.
+ *
+ * @return  Standard error code. */
+int ext4_dir_mv(const char *path, const char *new_path);
+
+/**@brief   Create new directory.
+ *
+ * @param   path Directory name.
+ *
+ * @return  Standard error code.*/
+int ext4_dir_mk(const char *path);
+
+/**@brief   Directory open.
+ *
+ * @param   dir  Directory handle.
+ * @param   path Directory path.
+ *
+ * @return  Standard error code.*/
+int ext4_dir_open(ext4_dir *dir, const char *path);
+
+/**@brief   Directory close.
+ *
+ * @param   dir directory handle.
+ *
+ * @return  Standard error code.*/
+int ext4_dir_close(ext4_dir *dir);
+
+/**@brief   Return next directory entry.
+ *
+ * @param   dir Directory handle.
+ *
+ * @return  Directory entry id (nil if no entry)*/
+const ext4_direntry *ext4_dir_entry_next(ext4_dir *dir);
+
+/**@brief   Rewind directory entry offset.
+ *
+ * @param   dir Directory handle.*/
+void ext4_dir_entry_rewind(ext4_dir *dir);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_balloc.h
@@ -1,0 +1,62 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_fs.h"
+
+/**@brief Compute number of block group from block address.
+ * @param sb superblock pointer.
+ * @param baddr Absolute address of block.
+ * @return Block group index
+ */
+u32int ext4_balloc_get_bgid_of_block(struct ext4_sblock *s,
+				       ext4_fsblk_t baddr);
+
+/**@brief Compute the starting block address of a block group
+ * @param sb   superblock pointer.
+ * @param bgid block group index
+ * @return Block address
+ */
+ext4_fsblk_t ext4_balloc_get_block_of_bgid(struct ext4_sblock *s,
+					   u32int bgid);
+
+/**@brief Calculate and set checksum of block bitmap.
+ * @param sb superblock pointer.
+ * @param bg block group
+ * @param bitmap bitmap buffer
+ */
+void ext4_balloc_set_bitmap_csum(struct ext4_sblock *sb,
+				 struct ext4_bgroup *bg,
+				 void *bitmap);
+
+/**@brief   Free block from inode.
+ * @param   inode_ref inode reference
+ * @param   baddr block address
+ * @return  standard error code*/
+int ext4_balloc_free_block(struct ext4_inode_ref *inode_ref,
+			   ext4_fsblk_t baddr);
+
+/**@brief   Free blocks from inode.
+ * @param   inode_ref inode reference
+ * @param   first block address
+ * @param   count block count
+ * @return  standard error code*/
+int ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref,
+			    ext4_fsblk_t first, u32int count);
+
+/**@brief   Allocate block procedure.
+ * @param   inode_ref inode reference
+ * @param   goal
+ * @param   baddr allocated block address
+ * @return  standard error code*/
+int ext4_balloc_alloc_block(struct ext4_inode_ref *inode_ref,
+			    ext4_fsblk_t goal,
+			    ext4_fsblk_t *baddr);
+
+/**@brief   Try allocate selected block.
+ * @param   inode_ref inode reference
+ * @param   baddr block address to allocate
+ * @param   free if baddr is not allocated
+ * @return  standard error code*/
+int ext4_balloc_try_alloc_block(struct ext4_inode_ref *inode_ref,
+				ext4_fsblk_t baddr, bool *free);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_bcache.h
@@ -1,0 +1,240 @@
+#pragma once
+
+#include "tree.h"
+#include "queue.h"
+
+#define EXT4_BLOCK_ZERO() 	\
+	{0}
+
+/**@brief   Single block descriptor*/
+struct ext4_buf {
+	/**@brief   Flags*/
+	int flags;
+
+	/**@brief   Logical block address*/
+	u64int lba;
+
+	/**@brief   Data buffer.*/
+	u8int *data;
+
+	/**@brief   LRU priority. (unused) */
+	u32int lru_prio;
+
+	/**@brief   LRU id.*/
+	u32int lru_id;
+
+	/**@brief   Reference count table*/
+	u32int refctr;
+
+	/**@brief   The block cache this buffer belongs to. */
+	struct ext4_bcache *bc;
+
+	/**@brief   Whether or not buffer is on dirty list.*/
+	bool on_dirty_list;
+
+	/**@brief   LBA tree node*/
+	RB_ENTRY(ext4_buf) lba_node;
+
+	/**@brief   LRU tree node*/
+	RB_ENTRY(ext4_buf) lru_node;
+
+	/**@brief   Dirty list node*/
+	SLIST_ENTRY(ext4_buf) dirty_node;
+
+	/**@brief   Callback routine after a disk-write operation.
+	 * @param   bc block cache descriptor
+	 * @param   buf buffer descriptor
+	 * @param   standard error code returned by bdev->bwrite()
+	 * @param   arg argument passed to this routine*/
+	void (*end_write)(struct ext4_bcache *bc,
+			  struct ext4_buf *buf,
+			  int res,
+			  void *arg);
+
+	/**@brief   argument passed to end_write() callback.*/
+	void *end_write_arg;
+};
+
+/**@brief   Single block descriptor*/
+struct ext4_block {
+	/**@brief   Logical block ID*/
+	u64int lb_id;
+
+	/**@brief   Buffer */
+	struct ext4_buf *buf;
+
+	/**@brief   Data buffer.*/
+	u8int *data;
+};
+
+/**@brief   Block cache descriptor*/
+struct ext4_bcache {
+
+	/**@brief   Item count in block cache*/
+	u32int cnt;
+
+	/**@brief   Item size in block cache*/
+	u32int itemsize;
+
+	/**@brief   Last recently used counter*/
+	u32int lru_ctr;
+
+	/**@brief   Currently referenced datablocks*/
+	u32int ref_blocks;
+
+	/**@brief   Maximum referenced datablocks*/
+	u32int max_ref_blocks;
+
+	/**@brief   The blockdev binded to this block cache*/
+	struct ext4_blockdev *bdev;
+
+	/**@brief   The cache should not be shaked */
+	bool dont_shake;
+
+	/**@brief   A tree holding all bufs*/
+	RB_HEAD(ext4_buf_lba, ext4_buf) lba_root;
+
+	/**@brief   A tree holding unreferenced bufs*/
+	RB_HEAD(ext4_buf_lru, ext4_buf) lru_root;
+
+	/**@brief   A singly-linked list holding dirty buffers*/
+	SLIST_HEAD(ext4_buf_dirty, ext4_buf) dirty_list;
+};
+
+/**@brief buffer state bits
+ *
+ *  - BC_UPTODATE: Buffer contains valid data.
+ *  - BC_DIRTY: Buffer is dirty.
+ *  - BC_FLUSH: Buffer will be immediately flushed,
+ *              when no one references it.
+ *  - BC_TMP: Buffer will be dropped once its refctr
+ *            reaches zero.
+ */
+enum bcache_state_bits {
+	BC_UPTODATE,
+	BC_DIRTY,
+	BC_FLUSH,
+	BC_TMP
+};
+
+#define ext4_bcache_set_flag(buf, b)    \
+	(buf)->flags |= 1 << (b)
+
+#define ext4_bcache_clear_flag(buf, b)    \
+	(buf)->flags &= ~(1 << (b))
+
+#define ext4_bcache_test_flag(buf, b)    \
+	(((buf)->flags & (1 << (b))) >> (b))
+
+static inline void ext4_bcache_set_dirty(struct ext4_buf *buf) {
+	ext4_bcache_set_flag(buf, BC_UPTODATE);
+	ext4_bcache_set_flag(buf, BC_DIRTY);
+}
+
+static inline void ext4_bcache_clear_dirty(struct ext4_buf *buf) {
+	ext4_bcache_clear_flag(buf, BC_UPTODATE);
+	ext4_bcache_clear_flag(buf, BC_DIRTY);
+}
+
+/**@brief   Increment reference counter of buf by 1.*/
+#define ext4_bcache_inc_ref(buf) ((buf)->refctr++)
+
+/**@brief   Decrement reference counter of buf by 1.*/
+#define ext4_bcache_dec_ref(buf) ((buf)->refctr--)
+
+/**@brief   Insert buffer to dirty cache list
+ * @param   bc block cache descriptor
+ * @param   buf buffer descriptor */
+static inline void
+ext4_bcache_insert_dirty_node(struct ext4_bcache *bc, struct ext4_buf *buf) {
+	if (!buf->on_dirty_list) {
+		SLIST_INSERT_HEAD(&bc->dirty_list, buf, dirty_node);
+		buf->on_dirty_list = true;
+	}
+}
+
+/**@brief   Remove buffer to dirty cache list
+ * @param   bc block cache descriptor
+ * @param   buf buffer descriptor */
+static inline void
+ext4_bcache_remove_dirty_node(struct ext4_bcache *bc, struct ext4_buf *buf) {
+	if (buf->on_dirty_list) {
+		SLIST_REMOVE(&bc->dirty_list, buf, ext4_buf, dirty_node);
+		buf->on_dirty_list = false;
+	}
+}
+
+
+/**@brief   Dynamic initialization of block cache.
+ * @param   bc block cache descriptor
+ * @param   cnt items count in block cache
+ * @param   itemsize single item size (in bytes)
+ * @return  standard error code*/
+int ext4_bcache_init_dynamic(struct ext4_bcache *bc, u32int cnt,
+			     u32int itemsize);
+
+/**@brief   Do cleanup works on block cache.
+ * @param   bc block cache descriptor.*/
+void ext4_bcache_cleanup(struct ext4_bcache *bc);
+
+/**@brief   Dynamic de-initialization of block cache.
+ * @param   bc block cache descriptor
+ * @return  standard error code*/
+int ext4_bcache_fini_dynamic(struct ext4_bcache *bc);
+
+/**@brief   Get a buffer with the lowest LRU counter in bcache.
+ * @param   bc block cache descriptor
+ * @return  buffer with the lowest LRU counter*/
+struct ext4_buf *ext4_buf_lowest_lru(struct ext4_bcache *bc);
+
+/**@brief   Drop unreferenced buffer from bcache.
+ * @param   bc block cache descriptor
+ * @param   buf buffer*/
+void ext4_bcache_drop_buf(struct ext4_bcache *bc, struct ext4_buf *buf);
+
+/**@brief   Invalidate a buffer.
+ * @param   bc block cache descriptor
+ * @param   buf buffer*/
+void ext4_bcache_invalidate_buf(struct ext4_bcache *bc,
+				struct ext4_buf *buf);
+
+/**@brief   Invalidate a range of buffers.
+ * @param   bc block cache descriptor
+ * @param   from starting lba
+ * @param   cnt block counts
+ * @param   buf buffer*/
+void ext4_bcache_invalidate_lba(struct ext4_bcache *bc,
+				u64int from,
+				u32int cnt);
+
+/**@brief   Find existing buffer from block cache memory.
+ *          Unreferenced block allocation is based on LRU
+ *          (Last Recently Used) algorithm.
+ * @param   bc block cache descriptor
+ * @param   b block to alloc
+ * @param   lba logical block address
+ * @return  block cache buffer */
+struct ext4_buf *
+ext4_bcache_find_get(struct ext4_bcache *bc, struct ext4_block *b,
+		     u64int lba);
+
+/**@brief   Allocate block from block cache memory.
+ *          Unreferenced block allocation is based on LRU
+ *          (Last Recently Used) algorithm.
+ * @param   bc block cache descriptor
+ * @param   b block to alloc
+ * @param   is_new block is new (needs to be read)
+ * @return  standard error code*/
+int ext4_bcache_alloc(struct ext4_bcache *bc, struct ext4_block *b,
+		      bool *is_new);
+
+/**@brief   Free block from cache memory (decrement reference counter).
+ * @param   bc block cache descriptor
+ * @param   b block to free
+ * @return  standard error code*/
+int ext4_bcache_free(struct ext4_bcache *bc, struct ext4_block *b);
+
+/**@brief   Return a full status of block cache.
+ * @param   bc block cache descriptor
+ * @return  full status*/
+bool ext4_bcache_is_full(struct ext4_bcache *bc);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_bitmap.h
@@ -1,0 +1,49 @@
+#pragma once
+
+#include "ext4_config.h"
+
+/**@brief   Set bitmap bit.
+ * @param   bmap bitmap
+ * @param   bit bit to set*/
+static inline void ext4_bmap_bit_set(u8int *bmap, u32int bit)
+{
+	*(bmap + (bit >> 3)) |= (1 << (bit & 7));
+}
+
+/**@brief   Clear bitmap bit.
+ * @param   bmap bitmap buffer
+ * @param   bit bit to clear*/
+static inline void ext4_bmap_bit_clr(u8int *bmap, u32int bit)
+{
+	*(bmap + (bit >> 3)) &= ~(1 << (bit & 7));
+}
+
+/**@brief   Check if the bitmap bit is set.
+ * @param   bmap bitmap buffer
+ * @param   bit bit to check*/
+static inline bool ext4_bmap_is_bit_set(u8int *bmap, u32int bit)
+{
+	return (*(bmap + (bit >> 3)) & (1 << (bit & 7)));
+}
+
+/**@brief   Check if the bitmap bit is clear.
+ * @param   bmap bitmap buffer
+ * @param   bit bit to check*/
+static inline bool ext4_bmap_is_bit_clr(u8int *bmap, u32int bit)
+{
+	return !ext4_bmap_is_bit_set(bmap, bit);
+}
+
+/**@brief   Free range of bits in bitmap.
+ * @param   bmap bitmap buffer
+ * @param   sbit start bit
+ * @param   bcnt bit count*/
+void ext4_bmap_bits_free(u8int *bmap, u32int sbit, u32int bcnt);
+
+/**@brief   Find first clear bit in bitmap.
+ * @param   sbit start bit of search
+ * @param   ebit end bit of search
+ * @param   bit_id output parameter (first free bit)
+ * @return  standard error code*/
+int ext4_bmap_bit_find_clr(u8int *bmap, u32int sbit, u32int ebit,
+			   u32int *bit_id, bool *no_space);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_block_group.h
@@ -1,0 +1,271 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_super.h"
+
+/**@brief Get address of block with data block bitmap.
+ * @param bg pointer to block group
+ * @param s pointer to superblock
+ * @return Address of block with block bitmap
+ */
+static inline u64int ext4_bg_get_block_bitmap(struct ext4_bgroup *bg,
+						struct ext4_sblock *s)
+{
+	u64int v = to_le32(bg->block_bitmap_lo);
+
+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		v |= (u64int)to_le32(bg->block_bitmap_hi) << 32;
+
+	return v;
+}
+
+/**@brief Set address of block with data block bitmap.
+ * @param bg pointer to block group
+ * @param s pointer to superblock
+ * @param blk block to set
+ * @return Address of block with block bitmap
+ */
+static inline void ext4_bg_set_block_bitmap(struct ext4_bgroup *bg,
+					    struct ext4_sblock *s, u64int blk)
+{
+
+	bg->block_bitmap_lo = to_le32((u32int)blk);
+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		bg->block_bitmap_hi = to_le32(blk >> 32);
+
+}
+
+/**@brief Get address of block with i-node bitmap.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @return Address of block with i-node bitmap
+ */
+static inline u64int ext4_bg_get_inode_bitmap(struct ext4_bgroup *bg,
+						struct ext4_sblock *s)
+{
+
+	u64int v = to_le32(bg->inode_bitmap_lo);
+
+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		v |= (u64int)to_le32(bg->inode_bitmap_hi) << 32;
+
+	return v;
+}
+
+/**@brief Set address of block with i-node bitmap.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @param blk block to set
+ * @return Address of block with i-node bitmap
+ */
+static inline void ext4_bg_set_inode_bitmap(struct ext4_bgroup *bg,
+					    struct ext4_sblock *s, u64int blk)
+{
+	bg->inode_bitmap_lo = to_le32((u32int)blk);
+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		bg->inode_bitmap_hi = to_le32(blk >> 32);
+
+}
+
+
+/**@brief Get address of the first block of the i-node table.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @return Address of first block of i-node table
+ */
+static inline u64int
+ext4_bg_get_inode_table_first_block(struct ext4_bgroup *bg,
+				    struct ext4_sblock *s)
+{
+	u64int v = to_le32(bg->inode_table_first_block_lo);
+
+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		v |= (u64int)to_le32(bg->inode_table_first_block_hi) << 32;
+
+	return v;
+}
+
+/**@brief Set address of the first block of the i-node table.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @param blk block to set
+ * @return Address of first block of i-node table
+ */
+static inline void
+ext4_bg_set_inode_table_first_block(struct ext4_bgroup *bg,
+				    struct ext4_sblock *s, u64int blk)
+{
+	bg->inode_table_first_block_lo = to_le32((u32int)blk);
+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		bg->inode_table_first_block_hi = to_le32(blk >> 32);
+}
+
+/**@brief Get number of free blocks in block group.
+ * @param bg Pointer to block group
+ * @param sb Pointer to superblock
+ * @return Number of free blocks in block group
+ */
+static inline u32int ext4_bg_get_free_blocks_count(struct ext4_bgroup *bg,
+						     struct ext4_sblock *s)
+{
+	u32int v = to_le16(bg->free_blocks_count_lo);
+
+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		v |= (u32int)to_le16(bg->free_blocks_count_hi) << 16;
+
+	return v;
+}
+
+/**@brief Set number of free blocks in block group.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @param cnt Number of free blocks in block group
+ */
+static inline void ext4_bg_set_free_blocks_count(struct ext4_bgroup *bg,
+						 struct ext4_sblock *s,
+						 u32int cnt)
+{
+	bg->free_blocks_count_lo = to_le16((cnt << 16) >> 16);
+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		bg->free_blocks_count_hi = to_le16(cnt >> 16);
+}
+
+/**@brief Get number of free i-nodes in block group.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @return Number of free i-nodes in block group
+ */
+static inline u32int ext4_bg_get_free_inodes_count(struct ext4_bgroup *bg,
+						     struct ext4_sblock *s)
+{
+	u32int v = to_le16(bg->free_inodes_count_lo);
+
+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		v |= (u32int)to_le16(bg->free_inodes_count_hi) << 16;
+
+	return v;
+}
+
+/**@brief Set number of free i-nodes in block group.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @param cnt Number of free i-nodes in block group
+ */
+static inline void ext4_bg_set_free_inodes_count(struct ext4_bgroup *bg,
+						 struct ext4_sblock *s,
+						 u32int cnt)
+{
+	bg->free_inodes_count_lo = to_le16((cnt << 16) >> 16);
+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		bg->free_inodes_count_hi = to_le16(cnt >> 16);
+}
+
+/**@brief Get number of used directories in block group.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @return Number of used directories in block group
+ */
+static inline u32int ext4_bg_get_used_dirs_count(struct ext4_bgroup *bg,
+						   struct ext4_sblock *s)
+{
+	u32int v = to_le16(bg->used_dirs_count_lo);
+
+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		v |= (u32int)to_le16(bg->used_dirs_count_hi) << 16;
+
+	return v;
+}
+
+/**@brief Set number of used directories in block group.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @param cnt Number of used directories in block group
+ */
+static inline void ext4_bg_set_used_dirs_count(struct ext4_bgroup *bg,
+					       struct ext4_sblock *s,
+					       u32int cnt)
+{
+	bg->used_dirs_count_lo = to_le16((cnt << 16) >> 16);
+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		bg->used_dirs_count_hi = to_le16(cnt >> 16);
+}
+
+/**@brief Get number of unused i-nodes.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @return Number of unused i-nodes
+ */
+static inline u32int ext4_bg_get_itable_unused(struct ext4_bgroup *bg,
+						 struct ext4_sblock *s)
+{
+
+	u32int v = to_le16(bg->itable_unused_lo);
+
+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		v |= (u32int)to_le16(bg->itable_unused_hi) << 16;
+
+	return v;
+}
+
+/**@brief Set number of unused i-nodes.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @param cnt Number of unused i-nodes
+ */
+static inline void ext4_bg_set_itable_unused(struct ext4_bgroup *bg,
+					     struct ext4_sblock *s,
+					     u32int cnt)
+{
+	bg->itable_unused_lo = to_le16((cnt << 16) >> 16);
+	if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+		bg->itable_unused_hi = to_le16(cnt >> 16);
+}
+
+/**@brief  Set checksum of block group.
+ * @param bg Pointer to block group
+ * @param crc Cheksum of block group
+ */
+static inline void ext4_bg_set_checksum(struct ext4_bgroup *bg, u16int crc)
+{
+	bg->checksum = to_le16(crc);
+}
+
+/**@brief Check if block group has a flag.
+ * @param bg Pointer to block group
+ * @param flag Flag to be checked
+ * @return True if flag is set to 1
+ */
+static inline bool ext4_bg_has_flag(struct ext4_bgroup *bg, u32int f)
+{
+	return to_le16(bg->flags) & f;
+}
+
+/**@brief Set flag of block group.
+ * @param bg Pointer to block group
+ * @param flag Flag to be set
+ */
+static inline void ext4_bg_set_flag(struct ext4_bgroup *bg, u32int f)
+{
+	u16int flags = to_le16(bg->flags);
+	flags |= f;
+	bg->flags = to_le16(flags);
+}
+
+/**@brief Clear flag of block group.
+ * @param bg Pointer to block group
+ * @param flag Flag to be cleared
+ */
+static inline void ext4_bg_clear_flag(struct ext4_bgroup *bg, u32int f)
+{
+	u16int flags = to_le16(bg->flags);
+	flags &= ~f;
+	bg->flags = to_le16(flags);
+}
+
+/**@brief Calculate CRC16 of the block group.
+ * @param crc Init value
+ * @param buffer Input buffer
+ * @param len Sizeof input buffer
+ * @return Computed CRC16*/
+u16int ext4_bg_crc16(u16int crc, const u8int *buffer, usize len);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_blockdev.h
@@ -1,0 +1,215 @@
+#pragma once
+
+#include "ext4_bcache.h"
+
+struct ext4_blockdev_iface {
+	/**@brief   Open device function
+	 * @param   bdev block device.*/
+	int (*open)(struct ext4_blockdev *bdev);
+
+	/**@brief   Block read function.
+	 * @param   bdev block device
+	 * @param   buf output buffer
+	 * @param   blk_id block id
+	 * @param   blk_cnt block count*/
+	int (*bread)(struct ext4_blockdev *bdev, void *buf, u64int blk_id,
+		     u32int blk_cnt);
+
+	/**@brief   Block write function.
+	 * @param   buf input buffer
+	 * @param   blk_id block id
+	 * @param   blk_cnt block count*/
+	int (*bwrite)(struct ext4_blockdev *bdev, const void *buf,
+		      u64int blk_id, u32int blk_cnt);
+
+	/**@brief   Close device function.
+	 * @param   bdev block device.*/
+	int (*close)(struct ext4_blockdev *bdev);
+
+	/**@brief   Lock block device. Required in multi partition mode
+	 *          operations. Not mandatory field.
+	 * @param   bdev block device.*/
+	int (*lock)(struct ext4_blockdev *bdev);
+
+	/**@brief   Unlock block device. Required in multi partition mode
+	 *          operations. Not mandatory field.
+	 * @param   bdev block device.*/
+	int (*unlock)(struct ext4_blockdev *bdev);
+
+	/**@brief   Block size (bytes): physical*/
+	u32int ph_bsize;
+
+	/**@brief   Block count: physical*/
+	u64int ph_bcnt;
+
+	/**@brief   Block size buffer: physical*/
+	u8int *ph_bbuf;
+
+	/**@brief   Reference counter to block device interface*/
+	u32int ph_refctr;
+
+	/**@brief   Physical read counter*/
+	u32int bread_ctr;
+
+	/**@brief   Physical write counter*/
+	u32int bwrite_ctr;
+
+	/**@brief   User data pointer*/
+	void* p_user;
+};
+
+/**@brief   Definition of the simple block device.*/
+struct ext4_blockdev {
+	/**@brief Block device interface*/
+	struct ext4_blockdev_iface *bdif;
+
+	/**@brief Offset in bdif. For multi partition mode.*/
+	u64int part_offset;
+
+	/**@brief Part size in bdif. For multi partition mode.*/
+	u64int part_size;
+
+	/**@brief   Block cache.*/
+	struct ext4_bcache *bc;
+
+	/**@brief   Block size (bytes) logical*/
+	u32int lg_bsize;
+
+	/**@brief   Block count: logical*/
+	u64int lg_bcnt;
+
+	/**@brief   Cache write back mode reference counter*/
+	u32int cache_write_back;
+
+	/**@brief   The filesystem this block device belongs to. */
+	struct ext4_fs *fs;
+
+	void *journal;
+};
+
+#pragma incomplete struct ext4_blockdev
+
+/**@brief   Static initialization of the block device.*/
+#define EXT4_BLOCKDEV_STATIC_INSTANCE(__name, __bsize, __bcnt, __open, __bread,\
+				      __bwrite, __close, __lock, __unlock)     \
+	static u8int __name##_ph_bbuf[(__bsize)];                            \
+	static struct ext4_blockdev_iface __name##_iface = {                   \
+		.open = __open,                                                \
+		.bread = __bread,                                              \
+		.bwrite = __bwrite,                                            \
+		.close = __close,                                              \
+		.lock = __lock,                                                \
+		.unlock = __unlock,                                            \
+		.ph_bsize = __bsize,                                           \
+		.ph_bcnt = __bcnt,                                             \
+		.ph_bbuf = __name##_ph_bbuf,                                   \
+	};								       \
+	static struct ext4_blockdev __name = {                                 \
+		.bdif = &__name##_iface,                                       \
+		.part_offset = 0,                                              \
+		.part_size =  (__bcnt) * (__bsize),                            \
+	}
+
+/**@brief   Block device initialization.
+ * @param   bdev block device descriptor
+ * @return  standard error code*/
+int ext4_block_init(struct ext4_blockdev *bdev);
+
+/**@brief   Binds a bcache to block device.
+ * @param   bdev block device descriptor
+ * @param   bc block cache descriptor
+ * @return  standard error code*/
+int ext4_block_bind_bcache(struct ext4_blockdev *bdev, struct ext4_bcache *bc);
+
+/**@brief   Close block device
+ * @param   bdev block device descriptor
+ * @return  standard error code*/
+int ext4_block_fini(struct ext4_blockdev *bdev);
+
+/**@brief   Flush data in given buffer to disk.
+ * @param   bdev block device descriptor
+ * @param   buf buffer
+ * @return  standard error code*/
+int ext4_block_flush_buf(struct ext4_blockdev *bdev, struct ext4_buf *buf);
+
+/**@brief   Flush data in buffer of given lba to disk,
+ *          if that buffer exists in block cache.
+ * @param   bdev block device descriptor
+ * @param   lba logical block address
+ * @return  standard error code*/
+int ext4_block_flush_lba(struct ext4_blockdev *bdev, u64int lba);
+
+/**@brief   Set logical block size in block device.
+ * @param   bdev block device descriptor
+ * @param   lb_size logical block size (in bytes)
+ * @return  standard error code*/
+void ext4_block_set_lb_size(struct ext4_blockdev *bdev, u32int lb_bsize);
+
+/**@brief   Block get function (through cache, don't read).
+ * @param   bdev block device descriptor
+ * @param   b block descriptor
+ * @param   lba logical block address
+ * @return  standard error code*/
+int ext4_block_get_noread(struct ext4_blockdev *bdev, struct ext4_block *b,
+			  u64int lba);
+
+/**@brief   Block get function (through cache).
+ * @param   bdev block device descriptor
+ * @param   b block descriptor
+ * @param   lba logical block address
+ * @return  standard error code*/
+int ext4_block_get(struct ext4_blockdev *bdev, struct ext4_block *b,
+		   u64int lba);
+
+/**@brief   Block set procedure (through cache).
+ * @param   bdev block device descriptor
+ * @param   b block descriptor
+ * @return  standard error code*/
+int ext4_block_set(struct ext4_blockdev *bdev, struct ext4_block *b);
+
+/**@brief   Block read procedure (without cache)
+ * @param   bdev block device descriptor
+ * @param   buf output buffer
+ * @param   lba logical block address
+ * @return  standard error code*/
+int ext4_blocks_get_direct(struct ext4_blockdev *bdev, void *buf, u64int lba,
+			   u32int cnt);
+
+/**@brief   Block write procedure (without cache)
+ * @param   bdev block device descriptor
+ * @param   buf output buffer
+ * @param   lba logical block address
+ * @return  standard error code*/
+int ext4_blocks_set_direct(struct ext4_blockdev *bdev, const void *buf,
+			   u64int lba, u32int cnt);
+
+/**@brief   Write to block device (by direct address).
+ * @param   bdev block device descriptor
+ * @param   off byte offset in block device
+ * @param   buf input buffer
+ * @param   len length of the write buffer
+ * @return  standard error code*/
+int ext4_block_writebytes(struct ext4_blockdev *bdev, u64int off,
+			  const void *buf, u32int len);
+
+/**@brief   Read freom block device (by direct address).
+ * @param   bdev block device descriptor
+ * @param   off byte offset in block device
+ * @param   buf input buffer
+ * @param   len length of the write buffer
+ * @return  standard error code*/
+int ext4_block_readbytes(struct ext4_blockdev *bdev, u64int off, void *buf,
+			 u32int len);
+
+/**@brief   Flush all dirty buffers to disk
+ * @param   bdev block device descriptor
+ * @return  standard error code*/
+int ext4_block_cache_flush(struct ext4_blockdev *bdev);
+
+/**@brief   Enable/disable write back cache mode
+ * @param   bdev block device descriptor
+ * @param   on_off
+ *              !0 - ENABLE
+ *               0 - DISABLE (all delayed cache buffers will be flushed)
+ * @return  standard error code*/
+int ext4_block_cache_write_back(struct ext4_blockdev *bdev, u8int on_off);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_config.h
@@ -1,0 +1,40 @@
+#pragma once
+
+#include <u.h>
+#include <libc.h>
+
+typedef enum { false, true } bool;
+
+enum {
+	O_RDONLY = 00,
+	O_WRONLY = 01,
+	O_RDWR = 02,
+	O_CREAT = 0100,
+	O_EXCL = 0200,
+	O_TRUNC = 01000,
+	O_APPEND = 02000,
+};
+
+#if defined(__mips__) || defined(__power__) || defined(__power64__) || defined(__sparc__) || defined(__sparc64__)
+#define CONFIG_BIG_ENDIAN
+#endif
+
+#define CONFIG_EXT4_MAX_BLOCKDEV_NAME 128
+#define CONFIG_EXT4_MAX_MP_NAME 128
+#define CONFIG_EXT4_BLOCKDEVS_COUNT 32
+#define CONFIG_EXT4_MOUNTPOINTS_COUNT 32
+#define CONFIG_BLOCK_DEV_CACHE_SIZE 1024
+
+/* Maximum single truncate size. Transactions must be limited to reduce
+ * number of allocations for single transaction
+ */
+#define CONFIG_MAX_TRUNCATE_SIZE (16ul * 1024ul * 1024ul)
+
+extern char Eexists[];
+extern char Einval[];
+extern char Eio[];
+extern char Enomem[];
+extern char Enospc[];
+extern char Enotfound[];
+extern char Eperm[];
+extern char Erdonlyfs[];
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_crc32.h
@@ -1,0 +1,18 @@
+/* Based on FreeBSD. */
+#pragma once
+
+#include "ext4_config.h"
+
+/**@brief	CRC32 algorithm.
+ * @param	crc input feed
+ * @param 	buf input buffer
+ * @param	size input buffer length (bytes)
+ * @return	updated crc32 value*/
+u32int ext4_crc32(u32int crc, const void *buf, u32int size);
+
+/**@brief	CRC32C algorithm.
+ * @param	crc input feed
+ * @param 	buf input buffer
+ * @param	length input buffer length (bytes)
+ * @return	updated crc32c value*/
+u32int ext4_crc32c(u32int crc, const void *buf, u32int size);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_debug.h
@@ -1,0 +1,93 @@
+#pragma once
+
+#include "ext4_config.h"
+
+#define DEBUG_BALLOC (1ul << 0)
+#define DEBUG_BCACHE (1ul << 1)
+#define DEBUG_BITMAP (1ul << 2)
+#define DEBUG_BLOCK_GROUP (1ul << 3)
+#define DEBUG_BLOCKDEV (1ul << 4)
+#define DEBUG_DIR_IDX (1ul << 5)
+#define DEBUG_DIR (1ul << 6)
+#define DEBUG_EXTENT (1ul << 7)
+#define DEBUG_FS (1ul << 8)
+#define DEBUG_HASH (1ul << 9)
+#define DEBUG_IALLOC (1ul << 10)
+#define DEBUG_INODE (1ul << 11)
+#define DEBUG_SUPER (1ul << 12)
+#define DEBUG_XATTR (1ul << 13)
+#define DEBUG_MKFS (1ul << 14)
+#define DEBUG_EXT4 (1ul << 15)
+#define DEBUG_JBD (1ul << 16)
+#define DEBUG_MBR (1ul << 17)
+
+#define DEBUG_NOPREFIX (1ul << 31)
+#define DEBUG_ALL (0xFFFFFFFF)
+
+static inline const char *ext4_dmask_id2str(u32int m)
+{
+	switch(m) {
+	case DEBUG_BALLOC:
+		return "ext4_balloc: ";
+	case DEBUG_BCACHE:
+		return "ext4_bcache: ";
+	case DEBUG_BITMAP:
+		return "ext4_bitmap: ";
+	case DEBUG_BLOCK_GROUP:
+		return "ext4_block_group: ";
+	case DEBUG_BLOCKDEV:
+		return "ext4_blockdev: ";
+	case DEBUG_DIR_IDX:
+		return "ext4_dir_idx: ";
+	case DEBUG_DIR:
+		return "ext4_dir: ";
+	case DEBUG_EXTENT:
+		return "ext4_extent: ";
+	case DEBUG_FS:
+		return "ext4_fs: ";
+	case DEBUG_HASH:
+		return "ext4_hash: ";
+	case DEBUG_IALLOC:
+		return "ext4_ialloc: ";
+	case DEBUG_INODE:
+		return "ext4_inode: ";
+	case DEBUG_SUPER:
+		return "ext4_super: ";
+	case DEBUG_MKFS:
+		return "ext4_mkfs: ";
+	case DEBUG_JBD:
+		return "ext4_jbd: ";
+	case DEBUG_MBR:
+		return "ext4_mbr: ";
+	case DEBUG_EXT4:
+		return "ext4: ";
+	}
+	return "";
+}
+#define DBG_NONE  ""
+#define DBG_INFO  "[info]  "
+#define DBG_WARN  "[warn]  "
+#define DBG_ERROR "[error] "
+
+/**@brief   Global mask debug set.
+ * @brief   m new debug mask.*/
+void ext4_dmask_set(u32int m);
+
+/**@brief   Global mask debug clear.
+ * @brief   m new debug mask.*/
+void ext4_dmask_clr(u32int m);
+
+/**@brief   Global debug mask get.
+ * @return  debug mask*/
+u32int ext4_dmask_get(void);
+
+/**@brief   Debug printf.*/
+#define ext4_dbg(m, ...) \
+	do { \
+		if ((m) & ext4_dmask_get()) { \
+			if (!((m) & DEBUG_NOPREFIX)) { \
+				fprint(2, "%s: %s", __func__, ext4_dmask_id2str(m)); \
+			} \
+			fprint(2, __VA_ARGS__); \
+		} \
+	} while (0)
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_dir.h
@@ -1,0 +1,243 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_blockdev.h"
+#include "ext4_super.h"
+
+struct ext4_dir_iter {
+	struct ext4_inode_ref *inode_ref;
+	struct ext4_block curr_blk;
+	u64int curr_off;
+	struct ext4_dir_en *curr;
+};
+
+struct ext4_dir_search_result {
+	struct ext4_block block;
+	struct ext4_dir_en *dentry;
+};
+
+
+/**@brief Get i-node number from directory entry.
+ * @param de Directory entry
+ * @return I-node number
+ */
+static inline u32int
+ext4_dir_en_get_inode(struct ext4_dir_en *de)
+{
+	return to_le32(de->inode);
+}
+
+/**@brief Set i-node number to directory entry.
+ * @param de Directory entry
+ * @param inode I-node number
+ */
+static inline void
+ext4_dir_en_set_inode(struct ext4_dir_en *de, u32int inode)
+{
+	de->inode = to_le32(inode);
+}
+
+/**@brief Set i-node number to directory entry. (For HTree root)
+ * @param de Directory entry
+ * @param inode I-node number
+ */
+static inline void
+ext4_dx_dot_en_set_inode(struct ext4_dir_idx_dot_en *de, u32int inode)
+{
+	de->inode = to_le32(inode);
+}
+
+/**@brief Get directory entry length.
+ * @param de Directory entry
+ * @return Entry length
+ */
+static inline u16int ext4_dir_en_get_entry_len(struct ext4_dir_en *de)
+{
+	return to_le16(de->entry_len);
+}
+
+/**@brief Set directory entry length.
+ * @param de     Directory entry
+ * @param length Entry length
+ */
+static inline void ext4_dir_en_set_entry_len(struct ext4_dir_en *de, u16int l)
+{
+	de->entry_len = to_le16(l);
+}
+
+/**@brief Get directory entry name length.
+ * @param sb Superblock
+ * @param de Directory entry
+ * @return Entry name length
+ */
+static inline u16int ext4_dir_en_get_name_len(struct ext4_sblock *sb,
+						struct ext4_dir_en *de)
+{
+	u16int v = de->name_len;
+
+	if ((ext4_get32(sb, rev_level) == 0) &&
+	    (ext4_get32(sb, minor_rev_level) < 5))
+		v |= ((u16int)de->in.name_length_high) << 8;
+
+	return v;
+}
+
+/**@brief Set directory entry name length.
+ * @param sb     Superblock
+ * @param de     Directory entry
+ * @param length Entry name length
+ */
+static inline void ext4_dir_en_set_name_len(struct ext4_sblock *sb,
+					    struct ext4_dir_en *de,
+					    u16int len)
+{
+	de->name_len = (len << 8) >> 8;
+
+	if ((ext4_get32(sb, rev_level) == 0) &&
+	    (ext4_get32(sb, minor_rev_level) < 5))
+		de->in.name_length_high = len >> 8;
+}
+
+/**@brief Get i-node type of directory entry.
+ * @param sb Superblock
+ * @param de Directory entry
+ * @return I-node type (file, dir, etc.)
+ */
+static inline u8int ext4_dir_en_get_inode_type(struct ext4_sblock *sb,
+						 struct ext4_dir_en *de)
+{
+	if ((ext4_get32(sb, rev_level) > 0) ||
+	    (ext4_get32(sb, minor_rev_level) >= 5))
+		return de->in.inode_type;
+
+	return EXT4_DE_UNKNOWN;
+}
+/**@brief Set i-node type of directory entry.
+ * @param sb   Superblock
+ * @param de   Directory entry
+ * @param type I-node type (file, dir, etc.)
+ */
+
+static inline void ext4_dir_en_set_inode_type(struct ext4_sblock *sb,
+					      struct ext4_dir_en *de, u8int t)
+{
+	if ((ext4_get32(sb, rev_level) > 0) ||
+	    (ext4_get32(sb, minor_rev_level) >= 5))
+		de->in.inode_type = t;
+}
+
+/**@brief Verify checksum of a linear directory leaf block
+ * @param inode_ref Directory i-node
+ * @param dirent    Linear directory leaf block
+ * @return true means the block passed checksum verification
+ */
+bool ext4_dir_csum_verify(struct ext4_inode_ref *inode_ref,
+			  struct ext4_dir_en *dirent);
+
+/**@brief Initialize directory iterator.
+ * Set position to the first valid entry from the required position.
+ * @param it        Pointer to iterator to be initialized
+ * @param inode_ref Directory i-node
+ * @param pos       Position to start reading entries from
+ * @return Error code
+ */
+int ext4_dir_iterator_init(struct ext4_dir_iter *it,
+			   struct ext4_inode_ref *inode_ref, u64int pos);
+
+/**@brief Jump to the next valid entry
+ * @param it Initialized iterator
+ * @return Error code
+ */
+int ext4_dir_iterator_next(struct ext4_dir_iter *it);
+
+/**@brief Uninitialize directory iterator.
+ *        Release all allocated structures.
+ * @param it Iterator to be finished
+ * @return Error code
+ */
+int ext4_dir_iterator_fini(struct ext4_dir_iter *it);
+
+/**@brief Write directory entry to concrete data block.
+ * @param sb        Superblock
+ * @param en     Pointer to entry to be written
+ * @param entry_len Length of new entry
+ * @param child     Child i-node to be written to new entry
+ * @param name      Name of the new entry
+ * @param name_len  Length of entry name
+ */
+void ext4_dir_write_entry(struct ext4_sblock *sb, struct ext4_dir_en *en,
+			  u16int entry_len, struct ext4_inode_ref *child,
+			  const char *name, usize name_len);
+
+/**@brief Add new entry to the directory.
+ * @param parent Directory i-node
+ * @param name   Name of new entry
+ * @param child  I-node to be referenced from new entry
+ * @return Error code
+ */
+int ext4_dir_add_entry(struct ext4_inode_ref *parent, const char *name,
+		       u32int name_len, struct ext4_inode_ref *child);
+
+/**@brief Find directory entry with passed name.
+ * @param result Result structure to be returned if entry found
+ * @param parent Directory i-node
+ * @param name   Name of entry to be found
+ * @param name_len  Name length
+ * @return Error code
+ */
+int ext4_dir_find_entry(struct ext4_dir_search_result *result,
+			struct ext4_inode_ref *parent, const char *name,
+			u32int name_len);
+
+/**@brief Remove directory entry.
+ * @param parent Directory i-node
+ * @param name   Name of the entry to be removed
+ * @param name_len  Name length
+ * @return Error code
+ */
+int ext4_dir_remove_entry(struct ext4_inode_ref *parent, const char *name,
+			  u32int name_len);
+
+/**@brief Try to insert entry to concrete data block.
+ * @param sb           Superblock
+ * @param inode_ref    Directory i-node
+ * @param dst_blk      Block to try to insert entry to
+ * @param child        Child i-node to be inserted by new entry
+ * @param name         Name of the new entry
+ * @param name_len     Length of the new entry name
+ * @return Error code
+ */
+int ext4_dir_try_insert_entry(struct ext4_sblock *sb,
+			      struct ext4_inode_ref *inode_ref,
+			      struct ext4_block *dst_blk,
+			      struct ext4_inode_ref *child, const char *name,
+			      u32int name_len);
+
+/**@brief Try to find entry in block by name.
+ * @param block     Block containing entries
+ * @param sb        Superblock
+ * @param name_len  Length of entry name
+ * @param name      Name of entry to be found
+ * @param res_entry Output pointer to found entry, nil if not found
+ * @return Error code
+ */
+int ext4_dir_find_in_block(struct ext4_block *block, struct ext4_sblock *sb,
+			   usize name_len, const char *name,
+			   struct ext4_dir_en **res_entry);
+
+/**@brief Simple function to release allocated data from result.
+ * @param parent Parent inode
+ * @param result Search result to destroy
+ * @return Error code
+ *
+ */
+int ext4_dir_destroy_result(struct ext4_inode_ref *parent,
+			    struct ext4_dir_search_result *result);
+
+void ext4_dir_set_csum(struct ext4_inode_ref *inode_ref,
+		       struct ext4_dir_en *dirent);
+
+
+void ext4_dir_init_entry_tail(struct ext4_dir_entry_tail *t);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_dir_idx.h
@@ -1,0 +1,52 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_fs.h"
+#include "ext4_dir.h"
+
+struct ext4_dir_idx_block {
+	struct ext4_block b;
+	struct ext4_dir_idx_entry *entries;
+	struct ext4_dir_idx_entry *position;
+};
+
+#define EXT4_DIR_DX_INIT_BCNT 2
+
+
+/**@brief Initialize index structure of new directory.
+ * @param dir Pointer to directory i-node
+ * @param dir Pointer to parent directory i-node
+ * @return Error code
+ */
+int ext4_dir_dx_init(struct ext4_inode_ref *dir,
+		     struct ext4_inode_ref *parent);
+
+/**@brief Try to find directory entry using directory index.
+ * @param result    Output value - if entry will be found,
+ *                  than will be passed through this parameter
+ * @param inode_ref Directory i-node
+ * @param name_len  Length of name to be found
+ * @param name      Name to be found
+ * @return Error code
+ */
+int ext4_dir_dx_find_entry(struct ext4_dir_search_result *result,
+			   struct ext4_inode_ref *inode_ref, usize name_len,
+			   const char *name);
+
+/**@brief Add new entry to indexed directory
+ * @param parent Directory i-node
+ * @param child  I-node to be referenced from directory entry
+ * @param name   Name of new directory entry
+ * @return Error code
+ */
+int ext4_dir_dx_add_entry(struct ext4_inode_ref *parent,
+			  struct ext4_inode_ref *child, const char *name, u32int name_len);
+
+/**@brief Add new entry to indexed directory
+ * @param dir           Directory i-node
+ * @param parent_inode  parent inode index
+ * @return Error code
+ */
+int ext4_dir_dx_reset_parent_inode(struct ext4_inode_ref *dir,
+                                   u32int parent_inode);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_extent.h
@@ -1,0 +1,312 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_inode.h"
+
+/*
+ * Array of ext4_ext_path contains path to some extent.
+ * Creation/lookup routines use it for traversal/splitting/etc.
+ * Truncate uses it to simulate recursive walking.
+ */
+struct ext4_extent_path {
+	struct ext4_block block;
+	u16int depth;
+	struct ext4_extent_header *header;
+	struct ext4_extent_index *index;
+	struct ext4_extent *extent;
+};
+
+#define EXT4_EXT_UNWRITTEN_MASK (1L << 15)
+
+#define EXT4_EXT_MAX_LEN_WRITTEN (1L << 15)
+#define EXT4_EXT_MAX_LEN_UNWRITTEN \
+	(EXT4_EXT_MAX_LEN_WRITTEN - 1)
+
+#define EXT4_EXT_GET_LEN(ex) to_le16((ex)->nblocks)
+#define EXT4_EXT_GET_LEN_UNWRITTEN(ex) \
+	(EXT4_EXT_GET_LEN(ex) & ~(EXT4_EXT_UNWRITTEN_MASK))
+#define EXT4_EXT_SET_LEN(ex, count) \
+	((ex)->nblocks = to_le16(count))
+
+#define EXT4_EXT_IS_UNWRITTEN(ex) \
+	(EXT4_EXT_GET_LEN(ex) > EXT4_EXT_MAX_LEN_WRITTEN)
+#define EXT4_EXT_SET_UNWRITTEN(ex) \
+	((ex)->nblocks |= to_le16(EXT4_EXT_UNWRITTEN_MASK))
+#define EXT4_EXT_SET_WRITTEN(ex) \
+	((ex)->nblocks &= ~(to_le16(EXT4_EXT_UNWRITTEN_MASK)))
+
+#define EXT4_EXTENT_FIRST(header)                                              \
+	((struct ext4_extent *)(((char *)(header)) +                           \
+				sizeof(struct ext4_extent_header)))
+
+#define EXT4_EXTENT_FIRST_INDEX(header)                                        \
+	((struct ext4_extent_index *)(((char *)(header)) +                     \
+				      sizeof(struct ext4_extent_header)))
+
+#define EXT4_EXTENT_LAST(header)                                              \
+	((struct ext4_extent *)(((char *)(header)) +                          \
+				sizeof(struct ext4_extent_header)) +          \
+				(header)->nentries - 1)
+
+#define EXT4_EXTENT_LAST_INDEX(header)                                        \
+	((struct ext4_extent_index *)(((char *)(header)) +                    \
+				      sizeof(struct ext4_extent_header)) +    \
+				      (header)->nentries - 1)
+
+#define EXT4_EXTENT_SIZE sizeof(struct ext4_extent)
+#define EXT4_EXTENT_INDEX_SIZE sizeof(struct ext4_extent_index)
+
+#define EXT4_EXTENT_TAIL_OFFSET(hdr)                                           \
+	(sizeof(struct ext4_extent_header) +                                   \
+	 (sizeof(struct ext4_extent) * to_le16((hdr)->max_nentries)))
+
+#define EXT4_EXTENT_IN_RANGE(iblock, eiblock, len)	\
+	((iblock) >= (eiblock) && (iblock) <= (eiblock) + (len) - 1)
+
+#define EXT4_EXTENT_MAX_BLOCKS    ((u32int)(-1))
+
+/**@brief Get logical number of the block covered by extent.
+ * @param extent Extent to load number from
+ * @return Logical number of the first block covered by extent */
+static inline u32int ext4_extent_get_iblock(struct ext4_extent *extent)
+{
+	return to_le32(extent->iblock);
+}
+
+/**@brief Set logical number of the first block covered by extent.
+ * @param extent Extent to set number to
+ * @param iblock Logical number of the first block covered by extent */
+static inline void ext4_extent_set_iblock(struct ext4_extent *extent,
+					  ext4_lblk_t iblock)
+{
+	extent->iblock = to_le32(iblock);
+}
+
+/**@brief Get number of blocks covered by extent.
+ * @param extent Extent to load count from
+ * @return Number of blocks covered by extent */
+static inline u16int ext4_extent_get_nblocks(struct ext4_extent *extent)
+{
+	if (EXT4_EXT_IS_UNWRITTEN(extent))
+		return EXT4_EXT_GET_LEN_UNWRITTEN(extent);
+	else
+		return EXT4_EXT_GET_LEN(extent);
+}
+/**@brief Set number of blocks covered by extent.
+ * @param extent Extent to load count from
+ * @param count  Number of blocks covered by extent
+ * @param unwritten Whether the extent is unwritten or not */
+static inline void
+ext4_extent_set_nblocks(struct ext4_extent *extent,
+			      u16int count, bool unwritten)
+{
+	EXT4_EXT_SET_LEN(extent, count);
+	if (unwritten)
+		EXT4_EXT_SET_UNWRITTEN(extent);
+}
+
+/**@brief Get physical number of the first block covered by extent.
+ * @param extent Extent to load number
+ * @return Physical number of the first block covered by extent */
+static inline u64int ext4_extent_get_fblock(struct ext4_extent *extent)
+{
+	return ((u64int)to_le16(extent->fblock_hi)) << 32 |
+			((u64int)to_le32(extent->fblock_lo));
+}
+
+
+/**@brief Set physical number of the first block covered by extent.
+ * @param extent Extent to load number
+ * @param fblock Physical number of the first block covered by extent */
+static inline void
+ext4_extent_set_fblock(struct ext4_extent *extent, u64int fblock)
+{
+	extent->fblock_lo = to_le32((fblock << 32) >> 32);
+	extent->fblock_hi = to_le16((u16int)(fblock >> 32));
+}
+
+
+/**@brief Get logical number of the block covered by extent index.
+ * @param index Extent index to load number from
+ * @return Logical number of the first block covered by extent index */
+static inline u32int
+ext4_extent_index_get_iblock(struct ext4_extent_index *index)
+{
+	return to_le32(index->iblock);
+}
+
+/**@brief Set logical number of the block covered by extent index.
+ * @param index  Extent index to set number to
+ * @param iblock Logical number of the first block covered by extent index */
+static inline void
+ext4_extent_index_set_iblock(struct ext4_extent_index *index,
+                             u32int iblock)
+{
+	index->iblock = to_le32(iblock);
+}
+
+/**@brief Get physical number of block where the child node is located.
+ * @param index Extent index to load number from
+ * @return Physical number of the block with child node */
+static inline u64int
+ext4_extent_index_get_fblock(struct ext4_extent_index *index)
+{
+	return ((u64int)to_le16(index->fblock_hi)) << 32 |
+			((u64int)to_le32(index->fblock_lo));
+}
+
+/**@brief Set physical number of block where the child node is located.
+ * @param index  Extent index to set number to
+ * @param fblock Ohysical number of the block with child node */
+static inline void ext4_extent_index_set_fblock(struct ext4_extent_index *index,
+						u64int fblock)
+{
+	index->fblock_lo = to_le32((fblock << 32) >> 32);
+	index->fblock_hi = to_le16((u16int)(fblock >> 32));
+}
+
+/**@brief Get magic value from extent header.
+ * @param header Extent header to load value from
+ * @return Magic value of extent header */
+static inline u16int
+ext4_extent_header_get_magic(struct ext4_extent_header *header)
+{
+	return to_le16(header->magic);
+}
+
+/**@brief Set magic value to extent header.
+ * @param header Extent header to set value to
+ * @param magic  Magic value of extent header */
+static inline void ext4_extent_header_set_magic(struct ext4_extent_header *header,
+						u16int magic)
+{
+	header->magic = to_le16(magic);
+}
+
+/**@brief Get number of entries from extent header
+ * @param header Extent header to get value from
+ * @return Number of entries covered by extent header */
+static inline u16int
+ext4_extent_header_get_nentries(struct ext4_extent_header *header)
+{
+	return to_le16(header->nentries);
+}
+
+/**@brief Set number of entries to extent header
+ * @param header Extent header to set value to
+ * @param count  Number of entries covered by extent header */
+static inline void
+ext4_extent_header_set_nentries(struct ext4_extent_header *header,
+				u16int count)
+{
+	header->nentries = to_le16(count);
+}
+
+/**@brief Get maximum number of entries from extent header
+ * @param header Extent header to get value from
+ * @return Maximum number of entries covered by extent header */
+static inline u16int
+ext4_extent_header_get_max_nentries(struct ext4_extent_header *header)
+{
+	return to_le16(header->max_nentries);
+}
+
+/**@brief Set maximum number of entries to extent header
+ * @param header    Extent header to set value to
+ * @param max_count Maximum number of entries covered by extent header */
+static inline void
+ext4_extent_header_set_max_nentries(struct ext4_extent_header *header,
+					 u16int max_count)
+{
+	header->max_nentries = to_le16(max_count);
+}
+
+/**@brief Get depth of extent subtree.
+ * @param header Extent header to get value from
+ * @return Depth of extent subtree */
+static inline u16int
+ext4_extent_header_get_depth(struct ext4_extent_header *header)
+{
+	return to_le16(header->depth);
+}
+
+/**@brief Set depth of extent subtree.
+ * @param header Extent header to set value to
+ * @param depth  Depth of extent subtree */
+static inline void
+ext4_extent_header_set_depth(struct ext4_extent_header *header,
+			     u16int depth)
+{
+	header->depth = to_le16(depth);
+}
+
+/**@brief Get generation from extent header
+ * @param header Extent header to get value from
+ * @return Generation */
+static inline u32int
+ext4_extent_header_get_generation(struct ext4_extent_header *header)
+{
+	return to_le32(header->generation);
+}
+
+/**@brief Set generation to extent header
+ * @param header     Extent header to set value to
+ * @param generation Generation */
+static inline void
+ext4_extent_header_set_generation(struct ext4_extent_header *header,
+				       u32int generation)
+{
+	header->generation = to_le32(generation);
+}
+
+/******************************************************************************/
+
+/**TODO:  */
+static inline void ext4_extent_tree_init(struct ext4_inode_ref *inode_ref)
+{
+	/* Initialize extent root header */
+	struct ext4_extent_header *header =
+			ext4_inode_get_extent_header(inode_ref->inode);
+	ext4_extent_header_set_depth(header, 0);
+	ext4_extent_header_set_nentries(header, 0);
+	ext4_extent_header_set_generation(header, 0);
+	ext4_extent_header_set_magic(header, EXT4_EXTENT_MAGIC);
+
+	u16int max_entries = (EXT4_INODE_BLOCKS * sizeof(u32int) -
+				sizeof(struct ext4_extent_header)) /
+				sizeof(struct ext4_extent);
+
+	ext4_extent_header_set_max_nentries(header, max_entries);
+	inode_ref->dirty  = true;
+}
+
+
+
+/**@brief Extent-based blockmap manipulation
+ * @param inode_ref   I-node
+ * @param iblock      starting logical block of the inode
+ * @param max_nblocks maximum number of blocks to get from/allocate to blockmap
+ * @param resfblockp  return physical block address of the first block of an
+ * extent
+ * @param create      true if caller wants to insert mapping or convert
+ * unwritten mapping to written one
+ * @param resnblocksp return number of blocks in an extent (must be smaller than
+ * \p max_nblocks)
+ * @return Error code*/
+int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref,
+			   ext4_lblk_t iblock,
+			   ext4_lblk_t max_nblocks,
+			   ext4_fsblk_t *resfblockp,
+			   bool create,
+			   ext4_lblk_t *resnblocksp);
+
+
+/**@brief Release all data blocks starting from specified logical block.
+ * @param inode_ref   I-node to release blocks from
+ * @param iblock_from First logical block to release
+ * @return Error code */
+int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref,
+			     ext4_lblk_t from,
+			     ext4_lblk_t to);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_fs.h
@@ -1,0 +1,222 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+
+struct ext4_fs {
+	bool read_only;
+
+	struct ext4_blockdev *bdev;
+	struct ext4_sblock sb;
+
+	u64int inode_block_limits[4];
+	u64int inode_blocks_per_level[4];
+
+	u32int last_inode_bg_id;
+
+	struct jbd_fs *jbd_fs;
+	struct jbd_journal *jbd_journal;
+	struct jbd_trans *curr_trans;
+};
+
+struct ext4_block_group_ref {
+	struct ext4_block block;
+	struct ext4_bgroup *block_group;
+	struct ext4_fs *fs;
+	u32int index;
+	bool dirty;
+};
+
+struct ext4_inode_ref {
+	struct ext4_block block;
+	struct ext4_inode *inode;
+	struct ext4_fs *fs;
+	u32int index;
+	bool dirty;
+};
+
+#pragma incomplete struct ext4_fs
+
+/**@brief Convert block address to relative index in block group.
+ * @param sb Superblock pointer
+ * @param baddr Block number to convert
+ * @return Relative number of block
+ */
+static inline u32int ext4_fs_addr_to_idx_bg(struct ext4_sblock *s,
+						     ext4_fsblk_t baddr)
+{
+	if (ext4_get32(s, first_data_block) && baddr)
+		baddr--;
+
+	return baddr % ext4_get32(s, blocks_per_group);
+}
+
+/**@brief Convert relative block address in group to absolute address.
+ * @param s Superblock pointer
+ * @param index Relative block address
+ * @param bgid Block group
+ * @return Absolute block address
+ */
+static inline ext4_fsblk_t ext4_fs_bg_idx_to_addr(struct ext4_sblock *s,
+						     u32int index,
+						     u32int bgid)
+{
+	if (ext4_get32(s, first_data_block))
+		index++;
+
+	return ext4_get32(s, blocks_per_group) * bgid + index;
+}
+
+/**@brief TODO: */
+static inline ext4_fsblk_t ext4_fs_first_bg_block_no(struct ext4_sblock *s,
+						 u32int bgid)
+{
+	return (u64int)bgid * ext4_get32(s, blocks_per_group) +
+	       ext4_get32(s, first_data_block);
+}
+
+/**@brief Initialize filesystem and read all needed data.
+ * @param fs Filesystem instance to be initialized
+ * @param bdev Identifier if device with the filesystem
+ * @param read_only Mark the filesystem as read-only.
+ * @return Error code
+ */
+int ext4_fs_init(struct ext4_fs *fs, struct ext4_blockdev *bdev,
+		 bool read_only);
+
+/**@brief Destroy filesystem instance (used by unmount operation).
+ * @param fs Filesystem to be destroyed
+ * @return Error code
+ */
+int ext4_fs_fini(struct ext4_fs *fs);
+
+/**@brief Check filesystem's features, if supported by this driver
+ * Function can return 0 and set read_only flag. It mean's that
+ * there are some not-supported features, that can cause problems
+ * during some write operations.
+ * @param fs        Filesystem to be checked
+ * @param read_only Flag if filesystem should be mounted only for reading
+ * @return Error code
+ */
+int ext4_fs_check_features(struct ext4_fs *fs, bool *read_only);
+
+/**@brief Get reference to block group specified by index.
+ * @param fs   Filesystem to find block group on
+ * @param bgid Index of block group to load
+ * @param ref  Output pointer for reference
+ * @return Error code
+ */
+int ext4_fs_get_block_group_ref(struct ext4_fs *fs, u32int bgid,
+				struct ext4_block_group_ref *ref);
+
+/**@brief Put reference to block group.
+ * @param ref Pointer for reference to be put back
+ * @return Error code
+ */
+int ext4_fs_put_block_group_ref(struct ext4_block_group_ref *ref);
+
+/**@brief Get reference to i-node specified by index.
+ * @param fs    Filesystem to find i-node on
+ * @param index Index of i-node to load
+ * @param ref   Output pointer for reference
+ * @return Error code
+ */
+int ext4_fs_get_inode_ref(struct ext4_fs *fs, u32int index,
+			  struct ext4_inode_ref *ref);
+
+/**@brief Reset blocks field of i-node.
+ * @param fs        Filesystem to reset blocks field of i-inode on
+ * @param inode_ref ref Pointer for inode to be operated on
+ */
+void ext4_fs_inode_blocks_init(struct ext4_fs *fs,
+			       struct ext4_inode_ref *inode_ref);
+
+/**@brief Put reference to i-node.
+ * @param ref Pointer for reference to be put back
+ * @return Error code
+ */
+int ext4_fs_put_inode_ref(struct ext4_inode_ref *ref);
+
+/**@brief Convert filetype to inode mode.
+ * @param filetype
+ * @return inode mode
+ */
+u32int ext4_fs_correspond_inode_mode(int filetype);
+
+/**@brief Allocate new i-node in the filesystem.
+ * @param fs        Filesystem to allocated i-node on
+ * @param inode_ref Output pointer to return reference to allocated i-node
+ * @param filetype  File type of newly created i-node
+ * @return Error code
+ */
+int ext4_fs_alloc_inode(struct ext4_fs *fs, struct ext4_inode_ref *inode_ref,
+			int filetype);
+
+/**@brief Release i-node and mark it as free.
+ * @param inode_ref I-node to be released
+ * @return Error code
+ */
+int ext4_fs_free_inode(struct ext4_inode_ref *inode_ref);
+
+/**@brief Truncate i-node data blocks.
+ * @param inode_ref I-node to be truncated
+ * @param new_size  New size of inode (must be < current size)
+ * @return Error code
+ */
+int ext4_fs_truncate_inode(struct ext4_inode_ref *inode_ref, u64int new_size);
+
+/**@brief Compute 'goal' for inode index
+ * @param inode_ref Reference to inode, to allocate block for
+ * @return goal
+ */
+ext4_fsblk_t ext4_fs_inode_to_goal_block(struct ext4_inode_ref *inode_ref);
+
+/**@brief Compute 'goal' for allocation algorithm (For blockmap).
+ * @param inode_ref Reference to inode, to allocate block for
+ * @param goal
+ * @return error code
+ */
+int ext4_fs_indirect_find_goal(struct ext4_inode_ref *inode_ref,
+				ext4_fsblk_t *goal);
+
+/**@brief Get physical block address by logical index of the block.
+ * @param inode_ref I-node to read block address from
+ * @param iblock            Logical index of block
+ * @param fblock            Output pointer for return physical
+ *                          block address
+ * @param support_unwritten Indicate whether unwritten block range
+ *                          is supported under the current context
+ * @return Error code
+ */
+int ext4_fs_get_inode_dblk_idx(struct ext4_inode_ref *inode_ref,
+				 ext4_lblk_t iblock, ext4_fsblk_t *fblock,
+				 bool support_unwritten);
+
+/**@brief Initialize a part of unwritten range of the inode.
+ * @param inode_ref I-node to proceed on.
+ * @param iblock    Logical index of block
+ * @param fblock    Output pointer for return physical block address
+ * @return Error code
+ */
+int ext4_fs_init_inode_dblk_idx(struct ext4_inode_ref *inode_ref,
+				  ext4_lblk_t iblock, ext4_fsblk_t *fblock);
+
+/**@brief Append following logical block to the i-node.
+ * @param inode_ref I-node to append block to
+ * @param fblock    Output physical block address of newly allocated block
+ * @param iblock    Output logical number of newly allocated block
+ * @return Error code
+ */
+int ext4_fs_append_inode_dblk(struct ext4_inode_ref *inode_ref,
+			      ext4_fsblk_t *fblock, ext4_lblk_t *iblock);
+
+/**@brief   Increment inode link count.
+ * @param   inode none handle
+ */
+void ext4_fs_inode_links_count_inc(struct ext4_inode_ref *inode_ref);
+
+/**@brief   Decrement inode link count.
+ * @param   inode none handle
+ */
+void ext4_fs_inode_links_count_dec(struct ext4_inode_ref *inode_ref);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_hash.h
@@ -1,0 +1,22 @@
+#pragma once
+
+#include "ext4_config.h"
+
+struct ext4_hash_info {
+	u32int hash;
+	u32int minor_hash;
+	u32int hash_version;
+	const u32int *seed;
+};
+
+/**@brief   Directory entry name hash function.
+ * @param   name entry name
+ * @param   len entry name length
+ * @param   hash_seed (from superblock)
+ * @param   hash version (from superblock)
+ * @param   hash_minor output value
+ * @param   hash_major output value
+ * @return  standard error code*/
+int ext2_htree_hash(const char *name, int len, const u32int *hash_seed,
+		    int hash_version, u32int *hash_major,
+		    u32int *hash_minor);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_ialloc.h
@@ -1,0 +1,29 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+
+/**@brief Calculate and set checksum of inode bitmap.
+ * @param sb superblock pointer.
+ * @param bg block group
+ * @param bitmap bitmap buffer
+ */
+void ext4_ialloc_set_bitmap_csum(struct ext4_sblock *sb, struct ext4_bgroup *bg,
+				 void *bitmap);
+
+/**@brief Free i-node number and modify filesystem data structers.
+ * @param fs     Filesystem, where the i-node is located
+ * @param index  Index of i-node to be release
+ * @param is_dir Flag us for information whether i-node is directory or not
+ */
+int ext4_ialloc_free_inode(struct ext4_fs *fs, u32int index, bool is_dir);
+
+/**@brief I-node allocation algorithm.
+ * This is more simple algorithm, than Orlov allocator used
+ * in the Linux kernel.
+ * @param fs     Filesystem to allocate i-node on
+ * @param index  Output value - allocated i-node number
+ * @param is_dir Flag if allocated i-node will be file or directory
+ * @return Error code
+ */
+int ext4_ialloc_alloc_inode(struct ext4_fs *fs, u32int *index, bool is_dir);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_inode.h
@@ -1,0 +1,304 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+
+#pragma incomplete struct ext4_extent_header
+
+/**@brief Get mode of the i-node.
+ * @param sb    Superblock
+ * @param inode I-node to load mode from
+ * @return Mode of the i-node
+ */
+u32int ext4_inode_get_mode(struct ext4_sblock *sb, struct ext4_inode *inode);
+
+/**@brief Set mode of the i-node.
+ * @param sb    Superblock
+ * @param inode I-node to set mode to
+ * @param mode  Mode to set to i-node
+ */
+void ext4_inode_set_mode(struct ext4_sblock *sb, struct ext4_inode *inode,
+			 u32int mode);
+
+/**@brief Get ID of the i-node owner (user id).
+ * @param inode I-node to load uid from
+ * @return User ID of the i-node owner
+ */
+u32int ext4_inode_get_uid(struct ext4_inode *inode);
+
+/**@brief Set ID of the i-node owner.
+ * @param inode I-node to set uid to
+ * @param uid   ID of the i-node owner
+ */
+void ext4_inode_set_uid(struct ext4_inode *inode, u32int uid);
+
+/**@brief Get real i-node size.
+ * @param sb    Superblock
+ * @param inode I-node to load size from
+ * @return Real size of i-node
+ */
+u64int ext4_inode_get_size(struct ext4_sblock *sb, struct ext4_inode *inode);
+
+/**@brief Set real i-node size.
+ * @param inode I-node to set size to
+ * @param size  Size of the i-node
+ */
+void ext4_inode_set_size(struct ext4_inode *inode, u64int size);
+
+/**@brief Get time, when i-node was last accessed.
+ * @param inode I-node
+ * @return Time of the last access (POSIX)
+ */
+u32int ext4_inode_get_access_time(struct ext4_inode *inode);
+
+/**@brief Set time, when i-node was last accessed.
+ * @param inode I-node
+ * @param time  Time of the last access (POSIX)
+ */
+void ext4_inode_set_access_time(struct ext4_inode *inode, u32int time);
+
+/**@brief Get time, when i-node was last changed.
+ * @param inode I-node
+ * @return Time of the last change (POSIX)
+ */
+u32int ext4_inode_get_change_inode_time(struct ext4_inode *inode);
+
+/**@brief Set time, when i-node was last changed.
+ * @param inode I-node
+ * @param time  Time of the last change (POSIX)
+ */
+void ext4_inode_set_change_inode_time(struct ext4_inode *inode, u32int time);
+
+/**@brief Get time, when i-node content was last modified.
+ * @param inode I-node
+ * @return Time of the last content modification (POSIX)
+ */
+u32int ext4_inode_get_modif_time(struct ext4_inode *inode);
+
+/**@brief Set time, when i-node content was last modified.
+ * @param inode I-node
+ * @param time  Time of the last content modification (POSIX)
+ */
+void ext4_inode_set_modif_time(struct ext4_inode *inode, u32int time);
+
+/**@brief Get time, when i-node was deleted.
+ * @param inode I-node
+ * @return Time of the delete action (POSIX)
+ */
+u32int ext4_inode_get_del_time(struct ext4_inode *inode);
+
+/**@brief Get time, when i-node was created.
+ * @param inode I-node
+ * @return Time of the create action (POSIX)
+ */
+u32int ext4_inode_get_creation_time(struct ext4_inode *inode);
+
+/**@brief Set time, when i-node was deleted.
+ * @param inode I-node
+ * @param time  Time of the delete action (POSIX)
+ */
+void ext4_inode_set_del_time(struct ext4_inode *inode, u32int time);
+
+/**@brief Get ID of the i-node owner's group.
+ * @param inode I-node to load gid from
+ * @return Group ID of the i-node owner
+ */
+u32int ext4_inode_get_gid(struct ext4_inode *inode);
+
+/**@brief Set ID to the i-node owner's group.
+ * @param inode I-node to set gid to
+ * @param gid   Group ID of the i-node owner
+ */
+void ext4_inode_set_gid(struct ext4_inode *inode, u32int gid);
+
+/**@brief Get number of links to i-node.
+ * @param inode I-node to load number of links from
+ * @return Number of links to i-node
+ */
+u16int ext4_inode_get_links_cnt(struct ext4_inode *inode);
+
+/**@brief Set number of links to i-node.
+ * @param inode I-node to set number of links to
+ * @param count Number of links to i-node
+ */
+void ext4_inode_set_links_cnt(struct ext4_inode *inode, u16int cnt);
+
+/**@brief Get number of 512-bytes blocks used for i-node.
+ * @param sb    Superblock
+ * @param inode I-node
+ * @return Number of 512-bytes blocks
+ */
+u64int ext4_inode_get_blocks_count(struct ext4_sblock *sb,
+				     struct ext4_inode *inode);
+
+/**@brief Set number of 512-bytes blocks used for i-node.
+ * @param sb    Superblock
+ * @param inode I-node
+ * @param count Number of 512-bytes blocks
+ * @return Error code
+ */
+int ext4_inode_set_blocks_count(struct ext4_sblock *sb,
+				struct ext4_inode *inode, u64int cnt);
+
+/**@brief Get flags (features) of i-node.
+ * @param inode I-node to get flags from
+ * @return Flags (bitmap)
+ */
+u32int ext4_inode_get_flags(struct ext4_inode *inode);
+
+/**@brief Set flags (features) of i-node.
+ * @param inode I-node to set flags to
+ * @param flags Flags to set to i-node
+ */
+void ext4_inode_set_flags(struct ext4_inode *inode, u32int flags);
+
+/**@brief Get file generation (used by NFS).
+ * @param inode I-node
+ * @return File generation
+ */
+u32int ext4_inode_get_generation(struct ext4_inode *inode);
+
+/**@brief Set file generation (used by NFS).
+ * @param inode      I-node
+ * @param generation File generation
+ */
+void ext4_inode_set_generation(struct ext4_inode *inode, u32int gen);
+
+/**@brief Get extra I-node size field.
+ * @param sb         Superblock
+ * @param inode      I-node
+ * @return extra I-node size
+ */
+u16int ext4_inode_get_extra_isize(struct ext4_sblock *sb,
+				    struct ext4_inode *inode);
+
+/**@brief Set extra I-node size field.
+ * @param sb         Superblock
+ * @param inode      I-node
+ * @param size       extra I-node size
+ */
+void ext4_inode_set_extra_isize(struct ext4_sblock *sb,
+				struct ext4_inode *inode,
+				u16int size);
+
+/**@brief Get address of block, where are extended attributes located.
+ * @param inode I-node
+ * @param sb    Superblock
+ * @return Block address
+ */
+u64int ext4_inode_get_file_acl(struct ext4_inode *inode,
+				 struct ext4_sblock *sb);
+
+/**@brief Set address of block, where are extended attributes located.
+ * @param inode    I-node
+ * @param sb       Superblock
+ * @param file_acl Block address
+ */
+void ext4_inode_set_file_acl(struct ext4_inode *inode, struct ext4_sblock *sb,
+			     u64int acl);
+
+/**@brief Get block address of specified direct block.
+ * @param inode I-node to load block from
+ * @param idx   Index of logical block
+ * @return Physical block address
+ */
+u32int ext4_inode_get_direct_block(struct ext4_inode *inode, u32int idx);
+
+/**@brief Set block address of specified direct block.
+ * @param inode  I-node to set block address to
+ * @param idx    Index of logical block
+ * @param fblock Physical block address
+ */
+void ext4_inode_set_direct_block(struct ext4_inode *inode, u32int idx,
+				 u32int block);
+
+/**@brief Get block address of specified indirect block.
+ * @param inode I-node to get block address from
+ * @param idx   Index of indirect block
+ * @return Physical block address
+ */
+u32int ext4_inode_get_indirect_block(struct ext4_inode *inode, u32int idx);
+
+/**@brief Set block address of specified indirect block.
+ * @param inode  I-node to set block address to
+ * @param idx    Index of indirect block
+ * @param fblock Physical block address
+ */
+void ext4_inode_set_indirect_block(struct ext4_inode *inode, u32int idx,
+				   u32int block);
+
+/**@brief Get device number
+ * @param inode  I-node to get device number from
+ * @return Device number
+ */
+u32int ext4_inode_get_dev(struct ext4_inode *inode);
+
+/**@brief Set device number
+ * @param inode  I-node to set device number to
+ * @param dev    Device number
+ */
+void ext4_inode_set_dev(struct ext4_inode *inode, u32int dev);
+
+/**@brief return the type of i-node
+ * @param sb    Superblock
+ * @param inode I-node to return the type of
+ * @return Result of check operation
+ */
+u32int ext4_inode_type(struct ext4_sblock *sb, struct ext4_inode *inode);
+
+/**@brief Check if i-node has specified type.
+ * @param sb    Superblock
+ * @param inode I-node to check type of
+ * @param type  Type to check
+ * @return Result of check operation
+ */
+bool ext4_inode_is_type(struct ext4_sblock *sb, struct ext4_inode *inode,
+			u32int type);
+
+/**@brief Check if i-node has specified flag.
+ * @param inode I-node to check flags of
+ * @param flag  Flag to check
+ * @return Result of check operation
+ */
+bool ext4_inode_has_flag(struct ext4_inode *inode, u32int f);
+
+/**@brief Remove specified flag from i-node.
+ * @param inode      I-node to clear flag on
+ * @param clear_flag Flag to be cleared
+ */
+void ext4_inode_clear_flag(struct ext4_inode *inode, u32int f);
+
+/**@brief Set specified flag to i-node.
+ * @param inode    I-node to set flag on
+ * @param set_flag Flag to be set
+ */
+void ext4_inode_set_flag(struct ext4_inode *inode, u32int f);
+
+/**@brief Get inode checksum(crc32)
+ * @param sb    Superblock
+ * @param inode I-node to get checksum value from
+ */
+u32int
+ext4_inode_get_csum(struct ext4_sblock *sb, struct ext4_inode *inode);
+
+/**@brief Get inode checksum(crc32)
+ * @param sb    Superblock
+ * @param inode I-node to get checksum value from
+ */
+void
+ext4_inode_set_csum(struct ext4_sblock *sb, struct ext4_inode *inode,
+			u32int checksum);
+
+/**@brief Check if i-node can be truncated.
+ * @param sb    Superblock
+ * @param inode I-node to check
+ * @return Result of the check operation
+ */
+bool ext4_inode_can_truncate(struct ext4_sblock *sb, struct ext4_inode *inode);
+
+/**@brief Get extent header from the root of the extent tree.
+ * @param inode I-node to get extent header from
+ * @return Pointer to extent header of the root node
+ */
+struct ext4_extent_header *
+ext4_inode_get_extent_header(struct ext4_inode *inode);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_journal.h
@@ -1,0 +1,97 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "queue.h"
+#include "tree.h"
+
+struct jbd_fs {
+	struct ext4_blockdev *bdev;
+	struct ext4_inode_ref inode_ref;
+	struct jbd_sb sb;
+
+	bool dirty;
+};
+
+struct jbd_buf {
+	u32int jbd_lba;
+	struct ext4_block block;
+	struct jbd_trans *trans;
+	struct jbd_block_rec *block_rec;
+	TAILQ_ENTRY(jbd_buf) buf_node;
+	TAILQ_ENTRY(jbd_buf) dirty_buf_node;
+};
+
+struct jbd_revoke_rec {
+	ext4_fsblk_t lba;
+	RB_ENTRY(jbd_revoke_rec) revoke_node;
+};
+
+struct jbd_block_rec {
+	ext4_fsblk_t lba;
+	struct jbd_trans *trans;
+	RB_ENTRY(jbd_block_rec) block_rec_node;
+	LIST_ENTRY(jbd_block_rec) tbrec_node;
+	TAILQ_HEAD(jbd_buf_dirty, jbd_buf) dirty_buf_queue;
+};
+
+struct jbd_trans {
+	u32int trans_id;
+
+	u32int start_iblock;
+	int alloc_blocks;
+	int data_cnt;
+	u32int data_csum;
+	int written_cnt;
+	int error;
+
+	struct jbd_journal *journal;
+
+	TAILQ_HEAD(jbd_trans_buf, jbd_buf) buf_queue;
+	RB_HEAD(jbd_revoke_tree, jbd_revoke_rec) revoke_root;
+	LIST_HEAD(jbd_trans_block_rec, jbd_block_rec) tbrec_list;
+	TAILQ_ENTRY(jbd_trans) trans_node;
+};
+
+struct jbd_journal {
+	u32int first;
+	u32int start;
+	u32int last;
+	u32int trans_id;
+	u32int alloc_trans_id;
+
+	u32int block_size;
+
+	TAILQ_HEAD(jbd_cp_queue, jbd_trans) cp_queue;
+	RB_HEAD(jbd_block, jbd_block_rec) block_rec_root;
+
+	struct jbd_fs *jbd_fs;
+};
+
+int jbd_get_fs(struct ext4_fs *fs,
+	       struct jbd_fs *jbd_fs);
+int jbd_put_fs(struct jbd_fs *jbd_fs);
+int jbd_inode_bmap(struct jbd_fs *jbd_fs,
+		   ext4_lblk_t iblock,
+		   ext4_fsblk_t *fblock);
+int jbd_recover(struct jbd_fs *jbd_fs);
+int jbd_journal_start(struct jbd_fs *jbd_fs,
+		      struct jbd_journal *journal);
+int jbd_journal_stop(struct jbd_journal *journal);
+struct jbd_trans *
+jbd_journal_new_trans(struct jbd_journal *journal);
+int jbd_trans_set_block_dirty(struct jbd_trans *trans,
+			      struct ext4_block *block);
+int jbd_trans_revoke_block(struct jbd_trans *trans,
+			   ext4_fsblk_t lba);
+int jbd_trans_try_revoke_block(struct jbd_trans *trans,
+			       ext4_fsblk_t lba);
+void jbd_journal_free_trans(struct jbd_journal *journal,
+			    struct jbd_trans *trans,
+			    bool abort);
+int jbd_journal_commit_trans(struct jbd_journal *journal,
+			     struct jbd_trans *trans);
+void
+jbd_journal_purge_cp_trans(struct jbd_journal *journal,
+			   bool flush,
+			   bool once);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_mbr.h
@@ -1,0 +1,22 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_blockdev.h"
+
+/**@brief Master boot record block devices descriptor*/
+struct ext4_mbr_bdevs {
+	struct ext4_blockdev partitions[4];
+};
+
+int ext4_mbr_scan(struct ext4_blockdev *parent, struct ext4_mbr_bdevs *bdevs);
+
+/**@brief Master boot record partitions*/
+struct ext4_mbr_parts {
+
+	/**@brief Percentage division tab:
+	 *  - {50, 20, 10, 20}
+	 * Sum of all 4 elements must be <= 100*/
+	u8int division[4];
+};
+
+int ext4_mbr_write(struct ext4_blockdev *parent, struct ext4_mbr_parts *parts, u32int disk_id);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_misc.h
@@ -1,0 +1,87 @@
+#pragma once
+
+#define EXT4_DIV_ROUND_UP(x, y) (((x) + (y) - 1)/(y))
+#define EXT4_ALIGN(x, y) ((y) * EXT4_DIV_ROUND_UP((x), (y)))
+
+/****************************Endian conversion*****************/
+
+static inline u64int reorder64(u64int n)
+{
+	return  ((n & 0xff) << 56) |
+		((n & 0xff00) << 40) |
+		((n & 0xff0000) << 24) |
+		((n & 0xff000000LL) << 8) |
+		((n & 0xff00000000LL) >> 8) |
+		((n & 0xff0000000000LL) >> 24) |
+		((n & 0xff000000000000LL) >> 40) |
+		((n & 0xff00000000000000LL) >> 56);
+}
+
+static inline u32int reorder32(u32int n)
+{
+	return  ((n & 0xff) << 24) |
+		((n & 0xff00) << 8) |
+		((n & 0xff0000) >> 8) |
+		((n & 0xff000000) >> 24);
+}
+
+static inline u16int reorder16(u16int n)
+{
+	return  ((n & 0xff) << 8) |
+		((n & 0xff00) >> 8);
+}
+
+#ifdef CONFIG_BIG_ENDIAN
+#define to_le64(_n) reorder64(_n)
+#define to_le32(_n) reorder32(_n)
+#define to_le16(_n) reorder16(_n)
+
+#define to_be64(_n) (_n)
+#define to_be32(_n) (_n)
+#define to_be16(_n) (_n)
+
+#else
+#define to_le64(_n) (_n)
+#define to_le32(_n) (_n)
+#define to_le16(_n) (_n)
+
+#define to_be64(_n) reorder64(_n)
+#define to_be32(_n) reorder32(_n)
+#define to_be16(_n) reorder16(_n)
+#endif
+
+/****************************Access macros to ext4 structures*****************/
+
+#define ext4_get32(s, f) to_le32((s)->f)
+#define ext4_get16(s, f) to_le16((s)->f)
+#define ext4_get8(s, f) (s)->f
+
+#define ext4_set32(s, f, v) \
+	do { \
+		(s)->f = to_le32(v); \
+	} while (0)
+#define ext4_set16(s, f, v) \
+	do { \
+		(s)->f = to_le16(v); \
+	} while (0)
+#define ext4_set8 \
+	(s, f, v) do { (s)->f = (v); } \
+	while (0)
+
+/****************************Access macros to jbd2 structures*****************/
+
+#define jbd_get32(s, f) to_be32((s)->f)
+#define jbd_get16(s, f) to_be16((s)->f)
+#define jbd_get8(s, f) (s)->f
+
+#define jbd_set32(s, f, v) \
+	do { \
+		(s)->f = to_be32(v); \
+	} while (0)
+#define jbd_set16(s, f, v) \
+	do { \
+		(s)->f = to_be16(v); \
+	} while (0)
+#define jbd_set8 \
+	(s, f, v) do { (s)->f = (v); } \
+	while (0)
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_mkfs.h
@@ -1,0 +1,49 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_blockdev.h"
+#include "ext4_fs.h"
+
+struct ext4_mkfs_info {
+	u64int len;
+	u32int block_size;
+	u32int blocks_per_group;
+	u32int inodes_per_group;
+	u32int inode_size;
+	u32int inodes;
+	u32int journal_blocks;
+	u32int feat_ro_compat;
+	u32int feat_compat;
+	u32int feat_incompat;
+	u32int bg_desc_reserve_blocks;
+	u16int dsc_size;
+	u8int uuid[UUID_SIZE];
+	bool journal;
+	char label[16];
+};
+
+struct fs_aux_info {
+    struct ext4_sblock *sb;
+    u8int *bg_desc_blk;
+    struct xattr_list_element *xattrs;
+    u32int first_data_block;
+    u64int len_blocks;
+    u32int inode_table_blocks;
+    u32int groups;
+    u32int bg_desc_blocks;
+    u32int default_i_flags;
+    u32int blocks_per_ind;
+    u32int blocks_per_dind;
+    u32int blocks_per_tind;
+};
+
+int create_fs_aux_info(struct fs_aux_info *aux_info, struct ext4_mkfs_info *info);
+void release_fs_aux_info(struct fs_aux_info *aux_info);
+
+int write_sblocks(struct ext4_blockdev *bd, struct fs_aux_info *aux_info, struct ext4_mkfs_info *info);
+
+int ext4_mkfs_read_info(struct ext4_blockdev *bd, struct ext4_mkfs_info *info);
+
+int ext4_mkfs(struct ext4_fs *fs, struct ext4_blockdev *bd,
+	      struct ext4_mkfs_info *info, int fs_type);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_super.h
@@ -1,0 +1,185 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+
+/**@brief   Blocks count get stored in superblock.
+ * @param   s superblock descriptor
+ * @return  count of blocks*/
+static inline u64int ext4_sb_get_blocks_cnt(struct ext4_sblock *s)
+{
+	return ((u64int)to_le32(s->blocks_count_hi) << 32) |
+	       to_le32(s->blocks_count_lo);
+}
+
+/**@brief   Blocks count set  in superblock.
+ * @param   s superblock descriptor
+ * @return  count of blocks*/
+static inline void ext4_sb_set_blocks_cnt(struct ext4_sblock *s, u64int cnt)
+{
+	s->blocks_count_lo = to_le32((cnt << 32) >> 32);
+	s->blocks_count_hi = to_le32(cnt >> 32);
+}
+
+/**@brief   Free blocks count get stored in superblock.
+ * @param   s superblock descriptor
+ * @return  free blocks*/
+static inline u64int ext4_sb_get_free_blocks_cnt(struct ext4_sblock *s)
+{
+	return ((u64int)to_le32(s->free_blocks_count_hi) << 32) |
+	       to_le32(s->free_blocks_count_lo);
+}
+
+/**@brief   Free blocks count set.
+ * @param   s superblock descriptor
+ * @param   cnt new value of free blocks*/
+static inline void ext4_sb_set_free_blocks_cnt(struct ext4_sblock *s,
+					       u64int cnt)
+{
+	s->free_blocks_count_lo = to_le32((cnt << 32) >> 32);
+	s->free_blocks_count_hi = to_le32(cnt >> 32);
+}
+
+/**@brief   Block size get from superblock.
+ * @param   s superblock descriptor
+ * @return  block size in bytes*/
+static inline u32int ext4_sb_get_block_size(struct ext4_sblock *s)
+{
+	return 1024 << to_le32(s->log_block_size);
+}
+
+/**@brief   Block group descriptor size.
+ * @param   s superblock descriptor
+ * @return  block group descriptor size in bytes*/
+static inline u16int ext4_sb_get_desc_size(struct ext4_sblock *s)
+{
+	u16int size = to_le16(s->desc_size);
+
+	return size < EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE
+		   ? EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE
+		   : size;
+}
+
+/*************************Flags and features*********************************/
+
+/**@brief   Support check of flag.
+ * @param   s superblock descriptor
+ * @param   v flag to check
+ * @return  true if flag is supported*/
+static inline bool ext4_sb_check_flag(struct ext4_sblock *s, u32int v)
+{
+	return to_le32(s->flags) & v;
+}
+
+/**@brief   Support check of feature compatible.
+ * @param   s superblock descriptor
+ * @param   v feature to check
+ * @return  true if feature is supported*/
+static inline bool ext4_sb_feature_com(struct ext4_sblock *s, u32int v)
+{
+	return to_le32(s->features_compatible) & v;
+}
+
+/**@brief   Support check of feature incompatible.
+ * @param   s superblock descriptor
+ * @param   v feature to check
+ * @return  true if feature is supported*/
+static inline bool ext4_sb_feature_incom(struct ext4_sblock *s, u32int v)
+{
+	return to_le32(s->features_incompatible) & v;
+}
+
+/**@brief   Support check of read only flag.
+ * @param   s superblock descriptor
+ * @param   v flag to check
+ * @return  true if flag is supported*/
+static inline bool ext4_sb_feature_ro_com(struct ext4_sblock *s, u32int v)
+{
+	return to_le32(s->features_read_only) & v;
+}
+
+/**@brief   Block group to flex group.
+ * @param   s superblock descriptor
+ * @param   block_group block group
+ * @return  flex group id*/
+static inline u32int ext4_sb_bg_to_flex(struct ext4_sblock *s,
+					  u32int block_group)
+{
+	return block_group >> to_le32(s->log_groups_per_flex);
+}
+
+/**@brief   Flex block group size.
+ * @param   s superblock descriptor
+ * @return  flex bg size*/
+static inline u32int ext4_sb_flex_bg_size(struct ext4_sblock *s)
+{
+	return 1 << to_le32(s->log_groups_per_flex);
+}
+
+/**@brief   Return first meta block group id.
+ * @param   s superblock descriptor
+ * @return  first meta_bg id */
+static inline u32int ext4_sb_first_meta_bg(struct ext4_sblock *s)
+{
+	return to_le32(s->first_meta_bg);
+}
+
+/**************************More complex functions****************************/
+
+/**@brief   Returns a block group count.
+ * @param   s superblock descriptor
+ * @return  count of block groups*/
+u32int ext4_block_group_cnt(struct ext4_sblock *s);
+
+/**@brief   Returns block count in block group
+ *          (last block group may have less blocks)
+ * @param   s superblock descriptor
+ * @param   bgid block group id
+ * @return  blocks count*/
+u32int ext4_blocks_in_group_cnt(struct ext4_sblock *s, u32int bgid);
+
+/**@brief   Returns inodes count in block group
+ *          (last block group may have less inodes)
+ * @param   s superblock descriptor
+ * @param   bgid block group id
+ * @return  inodes count*/
+u32int ext4_inodes_in_group_cnt(struct ext4_sblock *s, u32int bgid);
+
+/***************************Read/write/check superblock**********************/
+
+/**@brief   Superblock write.
+ * @param   bdev block device descriptor.
+ * @param   s superblock descriptor
+ * @return  Standard error code */
+int ext4_sb_write(struct ext4_blockdev *bdev, struct ext4_sblock *s);
+
+/**@brief   Superblock read.
+ * @param   bdev block device descriptor.
+ * @param   s superblock descriptor
+ * @return  Standard error code */
+int ext4_sb_read(struct ext4_blockdev *bdev, struct ext4_sblock *s);
+
+/**@brief   Superblock simple validation.
+ * @param   s superblock descriptor
+ * @return  true if OK*/
+bool ext4_sb_check(struct ext4_sblock *s);
+
+/**@brief   Superblock presence in block group.
+ * @param   s superblock descriptor
+ * @param   block_group block group id
+ * @return  true if block group has superblock*/
+bool ext4_sb_is_super_in_bg(struct ext4_sblock *s, u32int block_group);
+
+/**@brief   TODO:*/
+bool ext4_sb_sparse(u32int group);
+
+/**@brief   TODO:*/
+u32int ext4_bg_num_gdb(struct ext4_sblock *s, u32int group);
+
+/**@brief   TODO:*/
+u32int ext4_num_base_meta_clusters(struct ext4_sblock *s,
+				     u32int block_group);
+
+/**@brief   TODO:*/
+void ext4_sb_set_csum(struct ext4_sblock *s);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_trans.h
@@ -1,0 +1,38 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+
+/**@brief   Mark a buffer dirty and add it to the current transaction.
+ * @param   buf buffer
+ * @return  standard error code*/
+int ext4_trans_set_block_dirty(struct ext4_buf *buf);
+
+/**@brief   Block get function (through cache, don't read).
+ *          jbd_trans_get_access would be called in order to
+ *          get write access to the buffer.
+ * @param   bdev block device descriptor
+ * @param   b block descriptor
+ * @param   lba logical block address
+ * @return  standard error code*/
+int ext4_trans_block_get_noread(struct ext4_blockdev *bdev,
+			  struct ext4_block *b,
+			  u64int lba);
+
+/**@brief   Block get function (through cache).
+ *          jbd_trans_get_access would be called in order to
+ *          get write access to the buffer.
+ * @param   bdev block device descriptor
+ * @param   b block descriptor
+ * @param   lba logical block address
+ * @return  standard error code*/
+int ext4_trans_block_get(struct ext4_blockdev *bdev,
+		   struct ext4_block *b,
+		   u64int lba);
+
+/**@brief  Try to add block to be revoked to the current transaction.
+ * @param  bdev block device descriptor
+ * @param  lba logical block address
+ * @return standard error code*/
+int ext4_trans_try_revoke_block(struct ext4_blockdev *bdev,
+			       u64int lba);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_types.h
@@ -1,0 +1,833 @@
+#pragma once
+
+#include "ext4_blockdev.h"
+#include "tree.h"
+
+/*
+ * Types of blocks.
+ */
+typedef u32int ext4_lblk_t;
+typedef u64int ext4_fsblk_t;
+
+#define EXT4_CHECKSUM_CRC32C 1
+
+#define UUID_SIZE 16
+
+#pragma pack on
+
+/*
+ * Structure of the super block
+ */
+struct ext4_sblock {
+	u32int inodes_count;		   /* I-nodes count */
+	u32int blocks_count_lo;	  /* Blocks count */
+	u32int reserved_blocks_count_lo; /* Reserved blocks count */
+	u32int free_blocks_count_lo;     /* Free blocks count */
+	u32int free_inodes_count;	/* Free inodes count */
+	u32int first_data_block;	 /* First Data Block */
+	u32int log_block_size;	   /* Block size */
+	u32int log_cluster_size;	 /* Obsoleted fragment size */
+	u32int blocks_per_group;	 /* Number of blocks per group */
+	u32int frags_per_group;	  /* Obsoleted fragments per group */
+	u32int inodes_per_group;	 /* Number of inodes per group */
+	u32int mount_time;		   /* Mount time */
+	u32int write_time;		   /* Write time */
+	u16int mount_count;		   /* Mount count */
+	u16int max_mount_count;	  /* Maximal mount count */
+	u16int magic;			   /* Magic signature */
+	u16int state;			   /* File system state */
+	u16int errors;		   /* Behavior when detecting errors */
+	u16int minor_rev_level;	  /* Minor revision level */
+	u32int last_check_time;	  /* Time of last check */
+	u32int check_interval;	   /* Maximum time between checks */
+	u32int creator_os;		   /* Creator OS */
+	u32int rev_level;		   /* Revision level */
+	u16int def_resuid;		   /* Default uid for reserved blocks */
+	u16int def_resgid;		   /* Default gid for reserved blocks */
+
+	/* Fields for EXT4_DYNAMIC_REV superblocks only. */
+	u32int first_inode;	 /* First non-reserved inode */
+	u16int inode_size;	  /* Size of inode structure */
+	u16int block_group_index;   /* Block group index of this superblock */
+	u32int features_compatible; /* Compatible feature set */
+	u32int features_incompatible;  /* Incompatible feature set */
+	u32int features_read_only;     /* Readonly-compatible feature set */
+	u8int uuid[UUID_SIZE];		 /* 128-bit uuid for volume */
+	char volume_name[16];		 /* Volume name */
+	char last_mounted[64];		 /* Directory where last mounted */
+	u32int algorithm_usage_bitmap; /* For compression */
+
+	/*
+	 * Performance hints. Directory preallocation should only
+	 * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
+	 */
+	u8int s_prealloc_blocks; /* Number of blocks to try to preallocate */
+	u8int s_prealloc_dir_blocks;  /* Number to preallocate for dirs */
+	u16int s_reserved_gdt_blocks; /* Per group desc for online growth */
+
+	/*
+	 * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
+	 */
+	u8int journal_uuid[UUID_SIZE];      /* UUID of journal superblock */
+	u32int journal_inode_number; /* Inode number of journal file */
+	u32int journal_dev;	  /* Device number of journal file */
+	u32int last_orphan;	  /* Head of list of inodes to delete */
+	u32int hash_seed[4];	 /* HTREE hash seed */
+	u8int default_hash_version;  /* Default hash version to use */
+	u8int journal_backup_type;
+	u16int desc_size;	  /* Size of group descriptor */
+	u32int default_mount_opts; /* Default mount options */
+	u32int first_meta_bg;      /* First metablock block group */
+	u32int mkfs_time;	  /* When the filesystem was created */
+	u32int journal_blocks[17]; /* Backup of the journal inode */
+
+	/* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
+	u32int blocks_count_hi;	  /* Blocks count */
+	u32int reserved_blocks_count_hi; /* Reserved blocks count */
+	u32int free_blocks_count_hi;     /* Free blocks count */
+	u16int min_extra_isize;    /* All inodes have at least # bytes */
+	u16int want_extra_isize;   /* New inodes should reserve # bytes */
+	u32int flags;		     /* Miscellaneous flags */
+	u16int raid_stride;	/* RAID stride */
+	u16int mmp_interval;       /* # seconds to wait in MMP checking */
+	u64int mmp_block;	  /* Block for multi-mount protection */
+	u32int raid_stripe_width;  /* Blocks on all data disks (N * stride) */
+	u8int log_groups_per_flex; /* FLEX_BG group size */
+	u8int checksum_type;
+	u16int reserved_pad;
+	u64int kbytes_written; /* Number of lifetime kilobytes written */
+	u32int snapshot_inum;  /* I-node number of active snapshot */
+	u32int snapshot_id;    /* Sequential ID of active snapshot */
+	u64int
+	    snapshot_r_blocks_count; /* Reserved blocks for active snapshot's
+					future use */
+	u32int
+	    snapshot_list; /* I-node number of the head of the on-disk snapshot
+			      list */
+	u32int error_count;	 /* Number of file system errors */
+	u32int first_error_time;    /* First time an error happened */
+	u32int first_error_ino;     /* I-node involved in first error */
+	u64int first_error_block;   /* Block involved of first error */
+	u8int first_error_func[32]; /* Function where the error happened */
+	u32int first_error_line;    /* Line number where error happened */
+	u32int last_error_time;     /* Most recent time of an error */
+	u32int last_error_ino;      /* I-node involved in last error */
+	u32int last_error_line;     /* Line number where error happened */
+	u64int last_error_block;    /* Block involved of last error */
+	u8int last_error_func[32];  /* Function where the error happened */
+	u8int mount_opts[64];
+	u32int usr_quota_inum;	/* inode for tracking user quota */
+	u32int grp_quota_inum;	/* inode for tracking group quota */
+	u32int overhead_clusters;	/* overhead blocks/clusters in fs */
+	u32int backup_bgs[2];	/* groups with sparse_super2 SBs */
+	u8int  encrypt_algos[4];	/* Encryption algorithms in use  */
+	u8int  encrypt_pw_salt[16];	/* Salt used for string2key algorithm */
+	u32int lpf_ino;		/* Location of the lost+found inode */
+	u32int padding[100];	/* Padding to the end of the block */
+	u32int checksum;		/* crc32c(superblock) */
+};
+
+#pragma pack off
+
+#define EXT4_SUPERBLOCK_MAGIC 0xEF53
+#define EXT4_SUPERBLOCK_SIZE 1024
+#define EXT4_SUPERBLOCK_OFFSET 1024
+
+#define EXT4_SUPERBLOCK_OS_LINUX 0
+#define EXT4_SUPERBLOCK_OS_HURD 1
+
+/*
+ * Misc. filesystem flags
+ */
+#define EXT4_SUPERBLOCK_FLAGS_SIGNED_HASH 0x0001
+#define EXT4_SUPERBLOCK_FLAGS_UNSIGNED_HASH 0x0002
+#define EXT4_SUPERBLOCK_FLAGS_TEST_FILESYS 0x0004
+/*
+ * Filesystem states
+ */
+#define EXT4_SUPERBLOCK_STATE_VALID_FS 0x0001  /* Unmounted cleanly */
+#define EXT4_SUPERBLOCK_STATE_ERROR_FS 0x0002  /* Errors detected */
+#define EXT4_SUPERBLOCK_STATE_ORPHAN_FS 0x0004 /* Orphans being recovered */
+
+/*
+ * Behaviour when errors detected
+ */
+#define EXT4_SUPERBLOCK_ERRORS_CONTINUE 1 /* Continue execution */
+#define EXT4_SUPERBLOCK_ERRORS_RO 2       /* Remount fs read-only */
+#define EXT4_SUPERBLOCK_ERRORS_PANIC 3    /* Panic */
+#define EXT4_SUPERBLOCK_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE
+
+/*
+ * Compatible features
+ */
+#define EXT4_FCOM_DIR_PREALLOC 0x0001
+#define EXT4_FCOM_IMAGIC_INODES 0x0002
+#define EXT4_FCOM_HAS_JOURNAL 0x0004
+#define EXT4_FCOM_EXT_ATTR 0x0008
+#define EXT4_FCOM_RESIZE_INODE 0x0010
+#define EXT4_FCOM_DIR_INDEX 0x0020
+
+/*
+ * Read-only compatible features
+ */
+#define EXT4_FRO_COM_SPARSE_SUPER 0x0001
+#define EXT4_FRO_COM_LARGE_FILE 0x0002
+#define EXT4_FRO_COM_BTREE_DIR 0x0004
+#define EXT4_FRO_COM_HUGE_FILE 0x0008
+#define EXT4_FRO_COM_GDT_CSUM 0x0010
+#define EXT4_FRO_COM_DIR_NLINK 0x0020
+#define EXT4_FRO_COM_EXTRA_ISIZE 0x0040
+#define EXT4_FRO_COM_QUOTA 0x0100
+#define EXT4_FRO_COM_BIGALLOC 0x0200
+#define EXT4_FRO_COM_METADATA_CSUM 0x0400
+
+/*
+ * Incompatible features
+ */
+#define EXT4_FINCOM_COMPRESSION 0x0001
+#define EXT4_FINCOM_FILETYPE 0x0002
+#define EXT4_FINCOM_RECOVER 0x0004     /* Needs recovery */
+#define EXT4_FINCOM_JOURNAL_DEV 0x0008 /* Journal device */
+#define EXT4_FINCOM_META_BG 0x0010
+#define EXT4_FINCOM_EXTENTS 0x0040 /* extents support */
+#define EXT4_FINCOM_64BIT 0x0080
+#define EXT4_FINCOM_MMP 0x0100
+#define EXT4_FINCOM_FLEX_BG 0x0200
+#define EXT4_FINCOM_EA_INODE 0x0400	 /* EA in inode */
+#define EXT4_FINCOM_DIRDATA 0x1000	  /* data in dirent */
+#define EXT4_FINCOM_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */
+#define EXT4_FINCOM_LARGEDIR 0x4000	 /* >2GB or 3-lvl htree */
+#define EXT4_FINCOM_INLINE_DATA 0x8000      /* data in inode */
+
+/*
+ * EXT2 supported feature set
+ */
+#define EXT2_SUPPORTED_FCOM 0x0000
+
+#define EXT2_SUPPORTED_FINCOM                                   \
+	(EXT4_FINCOM_FILETYPE | EXT4_FINCOM_META_BG)
+
+#define EXT2_SUPPORTED_FRO_COM                                  \
+	(EXT4_FRO_COM_SPARSE_SUPER |                            \
+	 EXT4_FRO_COM_LARGE_FILE)
+
+/*
+ * EXT3 supported feature set
+ */
+#define EXT3_SUPPORTED_FCOM (EXT4_FCOM_DIR_INDEX)
+
+#define EXT3_SUPPORTED_FINCOM                                 \
+	(EXT4_FINCOM_FILETYPE | EXT4_FINCOM_META_BG)
+
+#define EXT3_SUPPORTED_FRO_COM                                \
+	(EXT4_FRO_COM_SPARSE_SUPER | EXT4_FRO_COM_LARGE_FILE)
+
+/*
+ * EXT4 supported feature set
+ */
+#define EXT4_SUPPORTED_FCOM (EXT4_FCOM_DIR_INDEX)
+
+#define EXT4_SUPPORTED_FINCOM ( \
+	EXT4_FINCOM_FILETYPE | EXT4_FINCOM_META_BG |      \
+	EXT4_FINCOM_EXTENTS | EXT4_FINCOM_FLEX_BG |       \
+	EXT4_FINCOM_64BIT \
+)
+
+#define EXT4_SUPPORTED_FRO_COM ( \
+	EXT4_FRO_COM_SPARSE_SUPER |                       \
+	EXT4_FRO_COM_METADATA_CSUM |                      \
+	EXT4_FRO_COM_LARGE_FILE | EXT4_FRO_COM_GDT_CSUM | \
+	EXT4_FRO_COM_DIR_NLINK |                          \
+	EXT4_FRO_COM_EXTRA_ISIZE | EXT4_FRO_COM_HUGE_FILE \
+)
+
+/*Ignored features:
+ * RECOVER - journaling in lwext4 is not supported
+ *           (probably won't be ever...)
+ * MMP - multi-mout protection (impossible scenario)
+ * */
+#define EXT_FINCOM_IGNORED ( \
+	EXT4_FINCOM_RECOVER | \
+	EXT4_FINCOM_MMP | \
+	EXT4_FINCOM_BG_USE_META_CSUM \
+)
+
+/*
+// TODO: Features incompatible to implement
+#define EXT4_SUPPORTED_FINCOM
+                     (EXT4_FINCOM_INLINE_DATA)
+
+// TODO: Features read only to implement
+#define EXT4_SUPPORTED_FRO_COM
+                     EXT4_FRO_COM_BIGALLOC |\
+                     EXT4_FRO_COM_QUOTA)
+*/
+
+
+/* Inode table/bitmap not in use */
+#define EXT4_BLOCK_GROUP_INODE_UNINIT 0x0001
+/* Block bitmap not in use */
+#define EXT4_BLOCK_GROUP_BLOCK_UNINIT 0x0002
+/* On-disk itable initialized to zero */
+#define EXT4_BLOCK_GROUP_ITABLE_ZEROED 0x0004
+
+#pragma pack on
+
+/*
+ * Structure of a blocks group descriptor
+ */
+struct ext4_bgroup {
+	u32int block_bitmap_lo;	    /* Blocks bitmap block */
+	u32int inode_bitmap_lo;	    /* Inodes bitmap block */
+	u32int inode_table_first_block_lo; /* Inodes table block */
+	u16int free_blocks_count_lo;       /* Free blocks count */
+	u16int free_inodes_count_lo;       /* Free inodes count */
+	u16int used_dirs_count_lo;	 /* Directories count */
+	u16int flags;		       /* EXT4_BG_flags (INODE_UNINIT, etc) */
+	u32int exclude_bitmap_lo;    /* Exclude bitmap for snapshots */
+	u16int block_bitmap_csum_lo; /* crc32c(s_uuid+grp_num+bbitmap) LE */
+	u16int inode_bitmap_csum_lo; /* crc32c(s_uuid+grp_num+ibitmap) LE */
+	u16int itable_unused_lo;     /* Unused inodes count */
+	u16int checksum;	     /* crc16(sb_uuid+group+desc) */
+
+	u32int block_bitmap_hi;	    /* Blocks bitmap block MSB */
+	u32int inode_bitmap_hi;	    /* I-nodes bitmap block MSB */
+	u32int inode_table_first_block_hi; /* I-nodes table block MSB */
+	u16int free_blocks_count_hi;       /* Free blocks count MSB */
+	u16int free_inodes_count_hi;       /* Free i-nodes count MSB */
+	u16int used_dirs_count_hi;	 /* Directories count MSB */
+	u16int itable_unused_hi;	   /* Unused inodes count MSB */
+	u32int exclude_bitmap_hi;	  /* Exclude bitmap block MSB */
+	u16int block_bitmap_csum_hi; /* crc32c(s_uuid+grp_num+bbitmap) BE */
+	u16int inode_bitmap_csum_hi; /* crc32c(s_uuid+grp_num+ibitmap) BE */
+	u32int reserved;	     /* Padding */
+};
+
+#pragma pack off
+
+#define EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE 32
+#define EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE 64
+
+#define EXT4_MIN_BLOCK_SIZE 1024  /* 1 KiB */
+#define EXT4_MAX_BLOCK_SIZE 65536 /* 64 KiB */
+#define EXT4_REV0_INODE_SIZE 128
+
+#define EXT4_INODE_BLOCK_SIZE 512
+
+#define EXT4_INODE_DIRECT_BLOCK_COUNT 12
+#define EXT4_INODE_INDIRECT_BLOCK EXT4_INODE_DIRECT_BLOCK_COUNT
+#define EXT4_INODE_DOUBLE_INDIRECT_BLOCK (EXT4_INODE_INDIRECT_BLOCK + 1)
+#define EXT4_INODE_TRIPPLE_INDIRECT_BLOCK (EXT4_INODE_DOUBLE_INDIRECT_BLOCK + 1)
+#define EXT4_INODE_BLOCKS (EXT4_INODE_TRIPPLE_INDIRECT_BLOCK + 1)
+#define EXT4_INODE_INDIRECT_BLOCK_COUNT                                        \
+	(EXT4_INODE_BLOCKS - EXT4_INODE_DIRECT_BLOCK_COUNT)
+
+#pragma pack on
+
+/*
+ * Structure of an inode on the disk
+ */
+struct ext4_inode {
+	u16int mode;		    /* File mode */
+	u16int uid;		    /* Low 16 bits of owner uid */
+	u32int size_lo;	   /* Size in bytes */
+	u32int access_time;       /* Access time */
+	u32int change_inode_time; /* I-node change time */
+	u32int modification_time; /* Modification time */
+	u32int deletion_time;     /* Deletion time */
+	u16int gid;		    /* Low 16 bits of group id */
+	u16int links_count;       /* Links count */
+	u32int blocks_count_lo;   /* Blocks count */
+	u32int flags;		    /* File flags */
+	u32int unused_osd1;       /* OS dependent - not used in HelenOS */
+	u32int blocks[EXT4_INODE_BLOCKS]; /* Pointers to blocks */
+	u32int generation;		    /* File version (for NFS) */
+	u32int file_acl_lo;		    /* File ACL */
+	u32int size_hi;
+	u32int obso_faddr; /* Obsoleted fragment address */
+
+	union {
+		struct {
+			u16int blocks_high;
+			u16int file_acl_high;
+			u16int uid_high;
+			u16int gid_high;
+			u16int checksum_lo; /* crc32c(uuid+inum+inode) LE */
+			u16int reserved2;
+		} linux2;
+		struct {
+			u16int reserved1;
+			u16int mode_high;
+			u16int uid_high;
+			u16int gid_high;
+			u32int author;
+		} hurd2;
+	} osd2;
+
+	u16int extra_isize;
+	u16int checksum_hi;	/* crc32c(uuid+inum+inode) BE */
+	u32int ctime_extra; /* Extra change time (nsec << 2 | epoch) */
+	u32int mtime_extra; /* Extra Modification time (nsec << 2 | epoch) */
+	u32int atime_extra; /* Extra Access time (nsec << 2 | epoch) */
+	u32int crtime;      /* File creation time */
+	u32int
+	    crtime_extra;    /* Extra file creation time (nsec << 2 | epoch) */
+	u32int version_hi; /* High 32 bits for 64-bit version */
+};
+
+#pragma pack off
+
+#define EXT4_INODE_MODE_FIFO 0x1000
+#define EXT4_INODE_MODE_CHARDEV 0x2000
+#define EXT4_INODE_MODE_DIRECTORY 0x4000
+#define EXT4_INODE_MODE_BLOCKDEV 0x6000
+#define EXT4_INODE_MODE_FILE 0x8000
+#define EXT4_INODE_MODE_SOFTLINK 0xA000
+#define EXT4_INODE_MODE_SOCKET 0xC000
+#define EXT4_INODE_MODE_TYPE_MASK 0xF000
+
+/*
+ * Inode flags
+ */
+#define EXT4_INODE_FLAG_SECRM 0x00000001     /* Secure deletion */
+#define EXT4_INODE_FLAG_UNRM 0x00000002      /* Undelete */
+#define EXT4_INODE_FLAG_COMPR 0x00000004     /* Compress file */
+#define EXT4_INODE_FLAG_SYNC 0x00000008      /* Synchronous updates */
+#define EXT4_INODE_FLAG_IMMUTABLE 0x00000010 /* Immutable file */
+#define EXT4_INODE_FLAG_APPEND 0x00000020  /* writes to file may only append */
+#define EXT4_INODE_FLAG_NODUMP 0x00000040  /* do not dump file */
+#define EXT4_INODE_FLAG_NOATIME 0x00000080 /* do not update atime */
+
+/* Compression flags */
+#define EXT4_INODE_FLAG_DIRTY 0x00000100
+#define EXT4_INODE_FLAG_COMPRBLK                                               \
+	0x00000200			   /* One or more compressed clusters */
+#define EXT4_INODE_FLAG_NOCOMPR 0x00000400 /* Don't compress */
+#define EXT4_INODE_FLAG_ECOMPR 0x00000800  /* Compression error */
+
+#define EXT4_INODE_FLAG_INDEX 0x00001000  /* hash-indexed directory */
+#define EXT4_INODE_FLAG_IMAGIC 0x00002000 /* AFS directory */
+#define EXT4_INODE_FLAG_JOURNAL_DATA                                           \
+	0x00004000			  /* File data should be journaled */
+#define EXT4_INODE_FLAG_NOTAIL 0x00008000 /* File tail should not be merged */
+#define EXT4_INODE_FLAG_DIRSYNC                                                \
+	0x00010000 /* Dirsync behaviour (directories only) */
+#define EXT4_INODE_FLAG_TOPDIR 0x00020000    /* Top of directory hierarchies */
+#define EXT4_INODE_FLAG_HUGE_FILE 0x00040000 /* Set to each huge file */
+#define EXT4_INODE_FLAG_EXTENTS 0x00080000   /* Inode uses extents */
+#define EXT4_INODE_FLAG_EA_INODE 0x00200000  /* Inode used for large EA */
+#define EXT4_INODE_FLAG_EOFBLOCKS 0x00400000 /* Blocks allocated beyond EOF */
+#define EXT4_INODE_FLAG_RESERVED 0x80000000  /* reserved for ext4 lib */
+
+#define EXT4_INODE_ROOT_INDEX 2
+
+
+#define EXT4_DIRECTORY_FILENAME_LEN 255
+
+/**@brief   Directory entry types. */
+enum { EXT4_DE_UNKNOWN = 0,
+       EXT4_DE_REG_FILE,
+       EXT4_DE_DIR,
+       EXT4_DE_CHRDEV,
+       EXT4_DE_BLKDEV,
+       EXT4_DE_FIFO,
+       EXT4_DE_SOCK,
+       EXT4_DE_SYMLINK };
+
+#define EXT4_DIRENTRY_DIR_CSUM 0xDE
+
+#pragma pack on
+
+union ext4_dir_en_internal {
+	u8int name_length_high; /* Higher 8 bits of name length */
+	u8int inode_type;       /* Type of referenced inode (in rev >= 0.5) */
+};
+
+/**
+ * Linked list directory entry structure
+ */
+struct ext4_dir_en {
+	u32int inode;	/* I-node for the entry */
+	u16int entry_len; /* Distance to the next directory entry */
+	u8int name_len;   /* Lower 8 bits of name length */
+
+	union ext4_dir_en_internal in;
+	u8int name[]; /* Entry name */
+};
+
+/* Structures for indexed directory */
+
+struct ext4_dir_idx_climit {
+	u16int limit;
+	u16int count;
+};
+
+struct ext4_dir_idx_dot_en {
+	u32int inode;
+	u16int entry_length;
+	u8int name_length;
+	u8int inode_type;
+	u8int name[4];
+};
+
+struct ext4_dir_idx_rinfo {
+	u32int reserved_zero;
+	u8int hash_version;
+	u8int info_length;
+	u8int indirect_levels;
+	u8int unused_flags;
+};
+
+struct ext4_dir_idx_entry {
+	u32int hash;
+	u32int block;
+};
+
+struct ext4_dir_idx_root {
+	struct ext4_dir_idx_dot_en dots[2];
+	struct ext4_dir_idx_rinfo info;
+	struct ext4_dir_idx_entry en[];
+};
+
+struct ext4_fake_dir_entry {
+	u32int inode;
+	u16int entry_length;
+	u8int name_length;
+	u8int inode_type;
+};
+
+struct ext4_dir_idx_node {
+	struct ext4_fake_dir_entry fake;
+	struct ext4_dir_idx_entry entries[];
+};
+
+/*
+ * This goes at the end of each htree block.
+ */
+struct ext4_dir_idx_tail {
+	u32int reserved;
+	u32int checksum;	/* crc32c(uuid+inum+dirblock) */
+};
+
+/*
+ * This is a bogus directory entry at the end of each leaf block that
+ * records checksums.
+ */
+struct ext4_dir_entry_tail {
+	u32int reserved_zero1;	/* Pretend to be unused */
+	u16int rec_len;		/* 12 */
+	u8int reserved_zero2;	/* Zero name length */
+	u8int reserved_ft;	/* 0xDE, fake file type */
+	u32int checksum;		/* crc32c(uuid+inum+dirblock) */
+};
+
+#pragma pack off
+
+#define EXT4_DIRENT_TAIL(block, blocksize) \
+	((struct ext4_dir_entry_tail *)(((char *)(block)) + ((blocksize) - \
+					sizeof(struct ext4_dir_entry_tail))))
+
+#define EXT4_ERR_BAD_DX_DIR (-25000)
+#define EXT4_ERR_NOT_FOUND (-25001)
+
+#define EXT4_LINK_MAX 65000
+
+#define EXT4_BAD_INO 1
+#define EXT4_ROOT_INO 2
+#define EXT4_BOOT_LOADER_INO 5
+#define EXT4_UNDEL_DIR_INO 6
+#define EXT4_RESIZE_INO 7
+#define EXT4_JOURNAL_INO 8
+
+#define EXT4_GOOD_OLD_FIRST_INO 11
+
+#pragma pack on
+
+/*
+ * This is the extent tail on-disk structure.
+ * All other extent structures are 12 bytes long.  It turns out that
+ * block size % 12 >= 4 for at least all powers of 2 greater than 512, which
+ * covers all valid ext4 block sizes.  Therefore, this tail structure can be
+ * crammed into the end of the block without having to rebalance the tree.
+ */
+struct ext4_extent_tail
+{
+	u32int checksum;	/* crc32c(uuid+inum+extent_block) */
+};
+
+/*
+ * This is the extent on-disk structure.
+ * It's used at the bottom of the tree.
+ */
+struct ext4_extent {
+	u32int iblock;	/* First logical block extent covers */
+	u16int nblocks;	/* Number of blocks covered by extent */
+	u16int fblock_hi;	/* High 16 bits of physical block */
+	u32int fblock_lo;	/* Low 32 bits of physical block */
+};
+
+/*
+ * This is index on-disk structure.
+ * It's used at all the levels except the bottom.
+ */
+struct ext4_extent_index {
+	u32int iblock; /* Index covers logical blocks from 'block' */
+
+	/**
+	 * Pointer to the physical block of the next
+	 * level. leaf or next index could be there
+	 * high 16 bits of physical block
+	 */
+	u32int fblock_lo;
+	u16int fblock_hi;
+	u16int padding;
+};
+
+/*
+ * Each block (leaves and indexes), even inode-stored has header.
+ */
+struct ext4_extent_header {
+	u16int magic;
+	u16int nentries;	/* Number of valid entries */
+	u16int max_nentries;	/* Capacity of store in entries */
+	u16int depth;		/* Has tree real underlying blocks? */
+	u32int generation;	/* generation of the tree */
+};
+
+#pragma pack off
+
+#define EXT4_EXTENT_MAGIC 0xF30A
+
+/******************************************************************************/
+
+/* EXT3 HTree directory indexing */
+#define EXT2_HTREE_LEGACY 0
+#define EXT2_HTREE_HALF_MD4 1
+#define EXT2_HTREE_TEA 2
+#define EXT2_HTREE_LEGACY_UNSIGNED 3
+#define EXT2_HTREE_HALF_MD4_UNSIGNED 4
+#define EXT2_HTREE_TEA_UNSIGNED 5
+
+#define EXT2_HTREE_EOF 0x7FFFFFFFUL
+
+#define EXT4_GOOD_OLD_INODE_SIZE	128
+
+/*****************************************************************************/
+
+/*
+ * JBD stores integers in big endian.
+ */
+
+#define JBD_MAGIC_NUMBER 0xc03b3998U /* The first 4 bytes of /dev/random! */
+
+/*
+ * Descriptor block types:
+ */
+
+#define JBD_DESCRIPTOR_BLOCK	1
+#define JBD_COMMIT_BLOCK	2
+#define JBD_SUPERBLOCK		3
+#define JBD_SUPERBLOCK_V2	4
+#define JBD_REVOKE_BLOCK	5
+
+#pragma pack on
+
+/*
+ * Standard header for all descriptor blocks:
+ */
+struct jbd_bhdr {
+	u32int		magic;
+	u32int		blocktype;
+	u32int		sequence;
+};
+
+#pragma pack off
+
+/*
+ * Checksum types.
+ */
+#define JBD_CRC32_CHKSUM   1
+#define JBD_MD5_CHKSUM     2
+#define JBD_SHA1_CHKSUM    3
+#define JBD_CRC32C_CHKSUM  4
+
+#define JBD_CRC32_CHKSUM_SIZE 4
+
+#define JBD_CHECKSUM_BYTES (32 / sizeof(u32int))
+
+#pragma pack on
+
+/*
+ * Commit block header for storing transactional checksums:
+ *
+ * NOTE: If FEATURE_COMPAT_CHECKSUM (checksum v1) is set, the h_chksum*
+ * fields are used to store a checksum of the descriptor and data blocks.
+ *
+ * If FEATURE_INCOMPAT_CSUM_V2 (checksum v2) is set, then the h_chksum
+ * field is used to store crc32c(uuid+commit_block).  Each journal metadata
+ * block gets its own checksum, and data block checksums are stored in
+ * journal_block_tag (in the descriptor).  The other h_chksum* fields are
+ * not used.
+ *
+ * If FEATURE_INCOMPAT_CSUM_V3 is set, the descriptor block uses
+ * journal_block_tag3_t to store a full 32-bit checksum.  Everything else
+ * is the same as v2.
+ *
+ * Checksum v1, v2, and v3 are mutually exclusive features.
+ */
+
+struct jbd_commit_header {
+	struct jbd_bhdr header;
+	u8int chksum_type;
+	u8int chksum_size;
+	u8int padding[2];
+	u32int		chksum[JBD_CHECKSUM_BYTES];
+	u64int		commit_sec;
+	u32int		commit_nsec;
+};
+
+/*
+ * The block tag: used to describe a single buffer in the journal
+ */
+struct jbd_block_tag3 {
+	u32int		blocknr;	/* The on-disk block number */
+	u32int		flags;	/* See below */
+	u32int		blocknr_high; /* most-significant high 32bits. */
+	u32int		checksum;	/* crc32c(uuid+seq+block) */
+};
+
+struct jbd_block_tag {
+	u32int		blocknr;	/* The on-disk block number */
+	u16int		checksum;	/* truncated crc32c(uuid+seq+block) */
+	u16int		flags;	/* See below */
+	u32int		blocknr_high; /* most-significant high 32bits. */
+};
+
+#pragma pack off
+
+/* Definitions for the journal tag flags word: */
+#define JBD_FLAG_ESCAPE		1	/* on-disk block is escaped */
+#define JBD_FLAG_SAME_UUID	2	/* block has same uuid as previous */
+#define JBD_FLAG_DELETED	4	/* block deleted by this transaction */
+#define JBD_FLAG_LAST_TAG	8	/* last tag in this descriptor block */
+
+#pragma pack on
+
+/* Tail of descriptor block, for checksumming */
+struct jbd_block_tail {
+	u32int	checksum;
+};
+
+/*
+ * The revoke descriptor: used on disk to describe a series of blocks to
+ * be revoked from the log
+ */
+struct jbd_revoke_header {
+	struct jbd_bhdr  header;
+	u32int	 count;	/* Count of bytes used in the block */
+};
+
+/* Tail of revoke block, for checksumming */
+struct jbd_revoke_tail {
+	u32int		checksum;
+};
+
+#pragma pack off
+
+#define JBD_USERS_MAX 48
+#define JBD_USERS_SIZE (UUID_SIZE * JBD_USERS_MAX)
+
+#pragma pack on
+
+/*
+ * The journal superblock.  All fields are in big-endian byte order.
+ */
+struct jbd_sb {
+/* 0x0000 */
+	struct jbd_bhdr header;
+
+/* 0x000C */
+	/* Static information describing the journal */
+	u32int	blocksize;		/* journal device blocksize */
+	u32int	maxlen;		/* total blocks in journal file */
+	u32int	first;		/* first block of log information */
+
+/* 0x0018 */
+	/* Dynamic information describing the current state of the log */
+	u32int	sequence;		/* first commit ID expected in log */
+	u32int	start;		/* blocknr of start of log */
+
+/* 0x0020 */
+	/* Error value, as set by journal_abort(). */
+	s32int 	error_val;
+
+/* 0x0024 */
+	/* Remaining fields are only valid in a version-2 superblock */
+	u32int	feature_compat; 	/* compatible feature set */
+	u32int	feature_incompat; 	/* incompatible feature set */
+	u32int	feature_ro_compat; 	/* readonly-compatible feature set */
+/* 0x0030 */
+	u8int 	uuid[UUID_SIZE];		/* 128-bit uuid for journal */
+
+/* 0x0040 */
+	u32int	nr_users;		/* Nr of filesystems sharing log */
+
+	u32int	dynsuper;		/* Blocknr of dynamic superblock copy*/
+
+/* 0x0048 */
+	u32int	max_transaction;	/* Limit of journal blocks per trans.*/
+	u32int	max_trandata;	/* Limit of data blocks per trans. */
+
+/* 0x0050 */
+	u8int 	checksum_type;	/* checksum type */
+	u8int 	padding2[3];
+	u32int	padding[42];
+	u32int	checksum;		/* crc32c(superblock) */
+
+/* 0x0100 */
+	u8int 	users[JBD_USERS_SIZE];		/* ids of all fs'es sharing the log */
+
+/* 0x0400 */
+};
+
+#pragma pack off
+
+#define JBD_SUPERBLOCK_SIZE sizeof(struct jbd_sb)
+
+#define JBD_HAS_COMPAT_FEATURE(jsb,mask)					\
+	((jsb)->header.blocktype >= to_be32(2) &&				\
+	 ((jsb)->feature_compat & to_be32((mask))))
+#define JBD_HAS_RO_COMPAT_FEATURE(jsb,mask)				\
+	((jsb)->header.blocktype >= to_be32(2) &&				\
+	 ((jsb)->feature_ro_compat & to_be32((mask))))
+#define JBD_HAS_INCOMPAT_FEATURE(jsb,mask)				\
+	((jsb)->header.blocktype >= to_be32(2) &&				\
+	 ((jsb)->feature_incompat & to_be32((mask))))
+
+#define JBD_FEATURE_COMPAT_CHECKSUM	0x00000001
+
+#define JBD_FEATURE_INCOMPAT_REVOKE		0x00000001
+#define JBD_FEATURE_INCOMPAT_64BIT		0x00000002
+#define JBD_FEATURE_INCOMPAT_ASYNC_COMMIT	0x00000004
+#define JBD_FEATURE_INCOMPAT_CSUM_V2		0x00000008
+#define JBD_FEATURE_INCOMPAT_CSUM_V3		0x00000010
+
+/* Features known to this kernel version: */
+#define JBD_KNOWN_COMPAT_FEATURES	0
+#define JBD_KNOWN_ROCOMPAT_FEATURES	0
+#define JBD_KNOWN_INCOMPAT_FEATURES	(JBD_FEATURE_INCOMPAT_REVOKE|\
+					 JBD_FEATURE_INCOMPAT_ASYNC_COMMIT|\
+					 JBD_FEATURE_INCOMPAT_64BIT|\
+					 JBD_FEATURE_INCOMPAT_CSUM_V2|\
+					 JBD_FEATURE_INCOMPAT_CSUM_V3)
+
+/*****************************************************************************/
+
+#define EXT4_CRC32_INIT (0xFFFFFFFFUL)
+
+/*****************************************************************************/
+
+#define ext4_malloc  malloc
+#define ext4_calloc  calloc
+#define ext4_realloc realloc
+#define ext4_free    free
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/queue.h
@@ -1,0 +1,612 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)queue.h	8.5 (Berkeley) 8/20/94
+ * $FreeBSD$
+ */
+
+#pragma once
+
+/*
+ * This file defines four types of data structures: singly-linked lists,
+ * singly-linked tail queues, lists and tail queues.
+ *
+ * A singly-linked list is headed by a single forward pointer. The elements
+ * are singly linked for minimum space and pointer manipulation overhead at
+ * the expense of O(n) removal for arbitrary elements. New elements can be
+ * added to the list after an existing element or at the head of the list.
+ * Elements being removed from the head of the list should use the explicit
+ * macro for this purpose for optimum efficiency. A singly-linked list may
+ * only be traversed in the forward direction.  Singly-linked lists are ideal
+ * for applications with large datasets and few or no removals or for
+ * implementing a LIFO queue.
+ *
+ * A singly-linked tail queue is headed by a pair of pointers, one to the
+ * head of the list and the other to the tail of the list. The elements are
+ * singly linked for minimum space and pointer manipulation overhead at the
+ * expense of O(n) removal for arbitrary elements. New elements can be added
+ * to the list after an existing element, at the head of the list, or at the
+ * end of the list. Elements being removed from the head of the tail queue
+ * should use the explicit macro for this purpose for optimum efficiency.
+ * A singly-linked tail queue may only be traversed in the forward direction.
+ * Singly-linked tail queues are ideal for applications with large datasets
+ * and few or no removals or for implementing a FIFO queue.
+ *
+ * A list is headed by a single forward pointer (or an array of forward
+ * pointers for a hash table header). The elements are doubly linked
+ * so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before
+ * or after an existing element or at the head of the list. A list
+ * may be traversed in either direction.
+ *
+ * A tail queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or
+ * after an existing element, at the head of the list, or at the end of
+ * the list. A tail queue may be traversed in either direction.
+ *
+ * For details on the use of these macros, see the queue(3) manual page.
+ *
+ *
+ *				SLIST	LIST	STAILQ	TAILQ
+ * _HEAD			+	+	+	+
+ * _HEAD_INITIALIZER		+	+	+	+
+ * _ENTRY			+	+	+	+
+ * _INIT			+	+	+	+
+ * _EMPTY			+	+	+	+
+ * _FIRST			+	+	+	+
+ * _NEXT			+	+	+	+
+ * _PREV			-	+	-	+
+ * _LAST			-	-	+	+
+ * _FOREACH			+	+	+	+
+ * _FOREACH_FROM		+	+	+	+
+ * _FOREACH_SAFE		+	+	+	+
+ * _FOREACH_FROM_SAFE		+	+	+	+
+ * _FOREACH_REVERSE		-	-	-	+
+ * _FOREACH_REVERSE_FROM	-	-	-	+
+ * _FOREACH_REVERSE_SAFE	-	-	-	+
+ * _FOREACH_REVERSE_FROM_SAFE	-	-	-	+
+ * _INSERT_HEAD			+	+	+	+
+ * _INSERT_BEFORE		-	+	-	+
+ * _INSERT_AFTER		+	+	+	+
+ * _INSERT_TAIL			-	-	+	+
+ * _CONCAT			-	-	+	+
+ * _REMOVE_AFTER		+	-	+	-
+ * _REMOVE_HEAD			+	-	+	-
+ * _REMOVE			+	+	+	+
+ * _SWAP			+	+	+	+
+ *
+ */
+#define	QMD_TRACE_ELEM(elem)
+#define	QMD_TRACE_HEAD(head)
+#define	QMD_SAVELINK(name, link)
+#define	TRACEBUF
+#define	TRACEBUF_INITIALIZER
+#define	TRASHIT(x)
+
+/*
+ * Singly-linked List declarations.
+ */
+#define	SLIST_HEAD(name, type)						\
+struct name {								\
+	struct type *slh_first;	/* first element */			\
+}
+
+#define	SLIST_HEAD_INITIALIZER(head)					\
+	{ nil }
+
+#define	SLIST_ENTRY(type)						\
+struct {								\
+	struct type *sle_next;	/* next element */			\
+}
+
+/*
+ * Singly-linked List functions.
+ */
+#define	SLIST_EMPTY(head)	((head)->slh_first == nil)
+
+#define	SLIST_FIRST(head)	((head)->slh_first)
+
+#define	SLIST_FOREACH(var, head, field)					\
+	for ((var) = SLIST_FIRST((head));				\
+	    (var);							\
+	    (var) = SLIST_NEXT((var), field))
+
+#define	SLIST_FOREACH_FROM(var, head, field)				\
+	for ((var) = ((var) ? (var) : SLIST_FIRST((head)));		\
+	    (var);							\
+	    (var) = SLIST_NEXT((var), field))
+
+#define	SLIST_FOREACH_SAFE(var, head, field, tvar)			\
+	for ((var) = SLIST_FIRST((head));				\
+	    (var) && ((tvar) = SLIST_NEXT((var), field), 1);		\
+	    (var) = (tvar))
+
+#define	SLIST_FOREACH_FROM_SAFE(var, head, field, tvar)			\
+	for ((var) = ((var) ? (var) : SLIST_FIRST((head)));		\
+	    (var) && ((tvar) = SLIST_NEXT((var), field), 1);		\
+	    (var) = (tvar))
+
+#define	SLIST_FOREACH_PREVPTR(var, varp, head, field)			\
+	for ((varp) = &SLIST_FIRST((head));				\
+	    ((var) = *(varp)) != nil;					\
+	    (varp) = &SLIST_NEXT((var), field))
+
+#define	SLIST_INIT(head) do {						\
+	SLIST_FIRST((head)) = nil;					\
+} while (0)
+
+#define	SLIST_INSERT_AFTER(slistelm, elm, field) do {			\
+	SLIST_NEXT((elm), field) = SLIST_NEXT((slistelm), field);	\
+	SLIST_NEXT((slistelm), field) = (elm);				\
+} while (0)
+
+#define	SLIST_INSERT_HEAD(head, elm, field) do {			\
+	SLIST_NEXT((elm), field) = SLIST_FIRST((head));			\
+	SLIST_FIRST((head)) = (elm);					\
+} while (0)
+
+#define	SLIST_NEXT(elm, field)	((elm)->field.sle_next)
+
+#define	SLIST_REMOVE(head, elm, type, field) do {			\
+	QMD_SAVELINK(oldnext, (elm)->field.sle_next);			\
+	if (SLIST_FIRST((head)) == (elm)) {				\
+		SLIST_REMOVE_HEAD((head), field);			\
+	}								\
+	else {								\
+		struct type *curelm = SLIST_FIRST((head));		\
+		while (SLIST_NEXT(curelm, field) != (elm))		\
+			curelm = SLIST_NEXT(curelm, field);		\
+		SLIST_REMOVE_AFTER(curelm, field);			\
+	}								\
+	TRASHIT(*oldnext);						\
+} while (0)
+
+#define SLIST_REMOVE_AFTER(elm, field) do {				\
+	SLIST_NEXT(elm, field) =					\
+	    SLIST_NEXT(SLIST_NEXT(elm, field), field);			\
+} while (0)
+
+#define	SLIST_REMOVE_HEAD(head, field) do {				\
+	SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field);	\
+} while (0)
+
+#define SLIST_SWAP(head1, head2, type) do {				\
+	struct type *swap_first = SLIST_FIRST(head1);			\
+	SLIST_FIRST(head1) = SLIST_FIRST(head2);			\
+	SLIST_FIRST(head2) = swap_first;				\
+} while (0)
+
+/*
+ * Singly-linked Tail queue declarations.
+ */
+#define	STAILQ_HEAD(name, type)						\
+struct name {								\
+	struct type *stqh_first;/* first element */			\
+	struct type **stqh_last;/* addr of last next element */		\
+}
+
+#define	STAILQ_HEAD_INITIALIZER(head)					\
+	{ nil, &(head).stqh_first }
+
+#define	STAILQ_ENTRY(type)						\
+struct {								\
+	struct type *stqe_next;	/* next element */			\
+}
+
+/*
+ * Singly-linked Tail queue functions.
+ */
+#define	STAILQ_CONCAT(head1, head2) do {				\
+	if (!STAILQ_EMPTY((head2))) {					\
+		*(head1)->stqh_last = (head2)->stqh_first;		\
+		(head1)->stqh_last = (head2)->stqh_last;		\
+		STAILQ_INIT((head2));					\
+	}								\
+} while (0)
+
+#define	STAILQ_EMPTY(head)	((head)->stqh_first == nil)
+
+#define	STAILQ_FIRST(head)	((head)->stqh_first)
+
+#define	STAILQ_FOREACH(var, head, field)				\
+	for((var) = STAILQ_FIRST((head));				\
+	   (var);							\
+	   (var) = STAILQ_NEXT((var), field))
+
+#define	STAILQ_FOREACH_FROM(var, head, field)				\
+	for ((var) = ((var) ? (var) : STAILQ_FIRST((head)));		\
+	   (var);							\
+	   (var) = STAILQ_NEXT((var), field))
+
+#define	STAILQ_FOREACH_SAFE(var, head, field, tvar)			\
+	for ((var) = STAILQ_FIRST((head));				\
+	    (var) && ((tvar) = STAILQ_NEXT((var), field), 1);		\
+	    (var) = (tvar))
+
+#define	STAILQ_FOREACH_FROM_SAFE(var, head, field, tvar)		\
+	for ((var) = ((var) ? (var) : STAILQ_FIRST((head)));		\
+	    (var) && ((tvar) = STAILQ_NEXT((var), field), 1);		\
+	    (var) = (tvar))
+
+#define	STAILQ_INIT(head) do {						\
+	STAILQ_FIRST((head)) = nil;					\
+	(head)->stqh_last = &STAILQ_FIRST((head));			\
+} while (0)
+
+#define	STAILQ_INSERT_AFTER(head, tqelm, elm, field) do {		\
+	if ((STAILQ_NEXT((elm), field) = STAILQ_NEXT((tqelm), field)) == nil)\
+		(head)->stqh_last = &STAILQ_NEXT((elm), field);		\
+	STAILQ_NEXT((tqelm), field) = (elm);				\
+} while (0)
+
+#define	STAILQ_INSERT_HEAD(head, elm, field) do {			\
+	if ((STAILQ_NEXT((elm), field) = STAILQ_FIRST((head))) == nil)	\
+		(head)->stqh_last = &STAILQ_NEXT((elm), field);		\
+	STAILQ_FIRST((head)) = (elm);					\
+} while (0)
+
+#define	STAILQ_INSERT_TAIL(head, elm, field) do {			\
+	STAILQ_NEXT((elm), field) = nil;				\
+	*(head)->stqh_last = (elm);					\
+	(head)->stqh_last = &STAILQ_NEXT((elm), field);			\
+} while (0)
+
+#define	STAILQ_LAST(head, type, field)					\
+	(STAILQ_EMPTY((head)) ? nil :					\
+	    __containerof((head)->stqh_last, struct type, field.stqe_next))
+
+#define	STAILQ_NEXT(elm, field)	((elm)->field.stqe_next)
+
+#define	STAILQ_REMOVE(head, elm, type, field) do {			\
+	QMD_SAVELINK(oldnext, (elm)->field.stqe_next);			\
+	if (STAILQ_FIRST((head)) == (elm)) {				\
+		STAILQ_REMOVE_HEAD((head), field);			\
+	}								\
+	else {								\
+		struct type *curelm = STAILQ_FIRST((head));		\
+		while (STAILQ_NEXT(curelm, field) != (elm))		\
+			curelm = STAILQ_NEXT(curelm, field);		\
+		STAILQ_REMOVE_AFTER(head, curelm, field);		\
+	}								\
+	TRASHIT(*oldnext);						\
+} while (0)
+
+#define STAILQ_REMOVE_AFTER(head, elm, field) do {			\
+	if ((STAILQ_NEXT(elm, field) =					\
+	     STAILQ_NEXT(STAILQ_NEXT(elm, field), field)) == nil)	\
+		(head)->stqh_last = &STAILQ_NEXT((elm), field);		\
+} while (0)
+
+#define	STAILQ_REMOVE_HEAD(head, field) do {				\
+	if ((STAILQ_FIRST((head)) =					\
+	     STAILQ_NEXT(STAILQ_FIRST((head)), field)) == nil)		\
+		(head)->stqh_last = &STAILQ_FIRST((head));		\
+} while (0)
+
+#define STAILQ_SWAP(head1, head2, type) do {				\
+	struct type *swap_first = STAILQ_FIRST(head1);			\
+	struct type **swap_last = (head1)->stqh_last;			\
+	STAILQ_FIRST(head1) = STAILQ_FIRST(head2);			\
+	(head1)->stqh_last = (head2)->stqh_last;			\
+	STAILQ_FIRST(head2) = swap_first;				\
+	(head2)->stqh_last = swap_last;					\
+	if (STAILQ_EMPTY(head1))					\
+		(head1)->stqh_last = &STAILQ_FIRST(head1);		\
+	if (STAILQ_EMPTY(head2))					\
+		(head2)->stqh_last = &STAILQ_FIRST(head2);		\
+} while (0)
+
+
+/*
+ * List declarations.
+ */
+#define	LIST_HEAD(name, type)						\
+struct name {								\
+	struct type *lh_first;	/* first element */			\
+}
+
+#define	LIST_HEAD_INITIALIZER(head)					\
+	{ nil }
+
+#define	LIST_ENTRY(type)						\
+struct {								\
+	struct type *le_next;	/* next element */			\
+	struct type **le_prev;	/* address of previous next element */	\
+}
+
+/*
+ * List functions.
+ */
+
+#define	QMD_LIST_CHECK_HEAD(head, field)
+#define	QMD_LIST_CHECK_NEXT(elm, field)
+#define	QMD_LIST_CHECK_PREV(elm, field)
+
+#define	LIST_EMPTY(head)	((head)->lh_first == nil)
+
+#define	LIST_FIRST(head)	((head)->lh_first)
+
+#define	LIST_FOREACH(var, head, field)					\
+	for ((var) = LIST_FIRST((head));				\
+	    (var);							\
+	    (var) = LIST_NEXT((var), field))
+
+#define	LIST_FOREACH_FROM(var, head, field)				\
+	for ((var) = ((var) ? (var) : LIST_FIRST((head)));		\
+	    (var);							\
+	    (var) = LIST_NEXT((var), field))
+
+#define	LIST_FOREACH_SAFE(var, head, field, tvar)			\
+	for ((var) = LIST_FIRST((head));				\
+	    (var) && ((tvar) = LIST_NEXT((var), field), 1);		\
+	    (var) = (tvar))
+
+#define	LIST_FOREACH_FROM_SAFE(var, head, field, tvar)			\
+	for ((var) = ((var) ? (var) : LIST_FIRST((head)));		\
+	    (var) && ((tvar) = LIST_NEXT((var), field), 1);		\
+	    (var) = (tvar))
+
+#define	LIST_INIT(head) do {						\
+	LIST_FIRST((head)) = nil;					\
+} while (0)
+
+#define	LIST_INSERT_AFTER(listelm, elm, field) do {			\
+	QMD_LIST_CHECK_NEXT(listelm, field);				\
+	if ((LIST_NEXT((elm), field) = LIST_NEXT((listelm), field)) != nil)\
+		LIST_NEXT((listelm), field)->field.le_prev =		\
+		    &LIST_NEXT((elm), field);				\
+	LIST_NEXT((listelm), field) = (elm);				\
+	(elm)->field.le_prev = &LIST_NEXT((listelm), field);		\
+} while (0)
+
+#define	LIST_INSERT_BEFORE(listelm, elm, field) do {			\
+	QMD_LIST_CHECK_PREV(listelm, field);				\
+	(elm)->field.le_prev = (listelm)->field.le_prev;		\
+	LIST_NEXT((elm), field) = (listelm);				\
+	*(listelm)->field.le_prev = (elm);				\
+	(listelm)->field.le_prev = &LIST_NEXT((elm), field);		\
+} while (0)
+
+#define	LIST_INSERT_HEAD(head, elm, field) do {				\
+	QMD_LIST_CHECK_HEAD((head), field);				\
+	if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != nil)	\
+		LIST_FIRST((head))->field.le_prev = &LIST_NEXT((elm), field);\
+	LIST_FIRST((head)) = (elm);					\
+	(elm)->field.le_prev = &LIST_FIRST((head));			\
+} while (0)
+
+#define	LIST_NEXT(elm, field)	((elm)->field.le_next)
+
+#define	LIST_PREV(elm, head, type, field)				\
+	((elm)->field.le_prev == &LIST_FIRST((head)) ? nil :		\
+	    __containerof((elm)->field.le_prev, struct type, field.le_next))
+
+#define	LIST_REMOVE(elm, field) do {					\
+	QMD_SAVELINK(oldnext, (elm)->field.le_next);			\
+	QMD_SAVELINK(oldprev, (elm)->field.le_prev);			\
+	QMD_LIST_CHECK_NEXT(elm, field);				\
+	QMD_LIST_CHECK_PREV(elm, field);				\
+	if (LIST_NEXT((elm), field) != nil)				\
+		LIST_NEXT((elm), field)->field.le_prev = 		\
+		    (elm)->field.le_prev;				\
+	*(elm)->field.le_prev = LIST_NEXT((elm), field);		\
+	TRASHIT(*oldnext);						\
+	TRASHIT(*oldprev);						\
+} while (0)
+
+#define LIST_SWAP(head1, head2, type, field) do {			\
+	struct type *swap_tmp = LIST_FIRST((head1));			\
+	LIST_FIRST((head1)) = LIST_FIRST((head2));			\
+	LIST_FIRST((head2)) = swap_tmp;					\
+	if ((swap_tmp = LIST_FIRST((head1))) != nil)			\
+		swap_tmp->field.le_prev = &LIST_FIRST((head1));		\
+	if ((swap_tmp = LIST_FIRST((head2))) != nil)			\
+		swap_tmp->field.le_prev = &LIST_FIRST((head2));		\
+} while (0)
+
+/*
+ * Tail queue declarations.
+ */
+#define	TAILQ_HEAD(name, type)						\
+struct name {								\
+	struct type *tqh_first;	/* first element */			\
+	struct type **tqh_last;	/* addr of last next element */		\
+	TRACEBUF							\
+}
+
+#define	TAILQ_HEAD_INITIALIZER(head)					\
+	{ nil, &(head).tqh_first, TRACEBUF_INITIALIZER }
+
+#define	TAILQ_ENTRY(type)						\
+struct {								\
+	struct type *tqe_next;	/* next element */			\
+	struct type **tqe_prev;	/* address of previous next element */	\
+	TRACEBUF							\
+}
+
+/*
+ * Tail queue functions.
+ */
+#define	QMD_TAILQ_CHECK_HEAD(head, field)
+#define	QMD_TAILQ_CHECK_TAIL(head, headname)
+#define	QMD_TAILQ_CHECK_NEXT(elm, field)
+#define	QMD_TAILQ_CHECK_PREV(elm, field)
+
+#define	TAILQ_CONCAT(head1, head2, field) do {				\
+	if (!TAILQ_EMPTY(head2)) {					\
+		*(head1)->tqh_last = (head2)->tqh_first;		\
+		(head2)->tqh_first->field.tqe_prev = (head1)->tqh_last;	\
+		(head1)->tqh_last = (head2)->tqh_last;			\
+		TAILQ_INIT((head2));					\
+		QMD_TRACE_HEAD(head1);					\
+		QMD_TRACE_HEAD(head2);					\
+	}								\
+} while (0)
+
+#define	TAILQ_EMPTY(head)	((head)->tqh_first == nil)
+
+#define	TAILQ_FIRST(head)	((head)->tqh_first)
+
+#define	TAILQ_FOREACH(var, head, field)					\
+	for ((var) = TAILQ_FIRST((head));				\
+	    (var);							\
+	    (var) = TAILQ_NEXT((var), field))
+
+#define	TAILQ_FOREACH_FROM(var, head, field)				\
+	for ((var) = ((var) ? (var) : TAILQ_FIRST((head)));		\
+	    (var);							\
+	    (var) = TAILQ_NEXT((var), field))
+
+#define	TAILQ_FOREACH_SAFE(var, head, field, tvar)			\
+	for ((var) = TAILQ_FIRST((head));				\
+	    (var) && ((tvar) = TAILQ_NEXT((var), field), 1);		\
+	    (var) = (tvar))
+
+#define	TAILQ_FOREACH_FROM_SAFE(var, head, field, tvar)			\
+	for ((var) = ((var) ? (var) : TAILQ_FIRST((head)));		\
+	    (var) && ((tvar) = TAILQ_NEXT((var), field), 1);		\
+	    (var) = (tvar))
+
+#define	TAILQ_FOREACH_REVERSE(var, head, headname, field)		\
+	for ((var) = TAILQ_LAST((head), headname);			\
+	    (var);							\
+	    (var) = TAILQ_PREV((var), headname, field))
+
+#define	TAILQ_FOREACH_REVERSE_FROM(var, head, headname, field)		\
+	for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname));	\
+	    (var);							\
+	    (var) = TAILQ_PREV((var), headname, field))
+
+#define	TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar)	\
+	for ((var) = TAILQ_LAST((head), headname);			\
+	    (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1);	\
+	    (var) = (tvar))
+
+#define	TAILQ_FOREACH_REVERSE_FROM_SAFE(var, head, headname, field, tvar) \
+	for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname));	\
+	    (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1);	\
+	    (var) = (tvar))
+
+#define	TAILQ_INIT(head) do {						\
+	TAILQ_FIRST((head)) = nil;					\
+	(head)->tqh_last = &TAILQ_FIRST((head));			\
+	QMD_TRACE_HEAD(head);						\
+} while (0)
+
+#define	TAILQ_INSERT_AFTER(head, listelm, elm, field) do {		\
+	QMD_TAILQ_CHECK_NEXT(listelm, field);				\
+	if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != nil)\
+		TAILQ_NEXT((elm), field)->field.tqe_prev = 		\
+		    &TAILQ_NEXT((elm), field);				\
+	else {								\
+		(head)->tqh_last = &TAILQ_NEXT((elm), field);		\
+		QMD_TRACE_HEAD(head);					\
+	}								\
+	TAILQ_NEXT((listelm), field) = (elm);				\
+	(elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field);		\
+	QMD_TRACE_ELEM(&(elm)->field);					\
+	QMD_TRACE_ELEM(&(listelm)->field);				\
+} while (0)
+
+#define	TAILQ_INSERT_BEFORE(listelm, elm, field) do {			\
+	QMD_TAILQ_CHECK_PREV(listelm, field);				\
+	(elm)->field.tqe_prev = (listelm)->field.tqe_prev;		\
+	TAILQ_NEXT((elm), field) = (listelm);				\
+	*(listelm)->field.tqe_prev = (elm);				\
+	(listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field);		\
+	QMD_TRACE_ELEM(&(elm)->field);					\
+	QMD_TRACE_ELEM(&(listelm)->field);				\
+} while (0)
+
+#define	TAILQ_INSERT_HEAD(head, elm, field) do {			\
+	QMD_TAILQ_CHECK_HEAD(head, field);				\
+	if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != nil)	\
+		TAILQ_FIRST((head))->field.tqe_prev =			\
+		    &TAILQ_NEXT((elm), field);				\
+	else								\
+		(head)->tqh_last = &TAILQ_NEXT((elm), field);		\
+	TAILQ_FIRST((head)) = (elm);					\
+	(elm)->field.tqe_prev = &TAILQ_FIRST((head));			\
+	QMD_TRACE_HEAD(head);						\
+	QMD_TRACE_ELEM(&(elm)->field);					\
+} while (0)
+
+#define	TAILQ_INSERT_TAIL(head, elm, field) do {			\
+	QMD_TAILQ_CHECK_TAIL(head, field);				\
+	TAILQ_NEXT((elm), field) = nil;				\
+	(elm)->field.tqe_prev = (head)->tqh_last;			\
+	*(head)->tqh_last = (elm);					\
+	(head)->tqh_last = &TAILQ_NEXT((elm), field);			\
+	QMD_TRACE_HEAD(head);						\
+	QMD_TRACE_ELEM(&(elm)->field);					\
+} while (0)
+
+#define	TAILQ_LAST(head, headname)					\
+	(*(((struct headname *)((head)->tqh_last))->tqh_last))
+
+#define	TAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
+
+#define	TAILQ_PREV(elm, headname, field)				\
+	(*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
+
+#define	TAILQ_REMOVE(head, elm, field) do {				\
+	QMD_SAVELINK(oldnext, (elm)->field.tqe_next);			\
+	QMD_SAVELINK(oldprev, (elm)->field.tqe_prev);			\
+	QMD_TAILQ_CHECK_NEXT(elm, field);				\
+	QMD_TAILQ_CHECK_PREV(elm, field);				\
+	if ((TAILQ_NEXT((elm), field)) != nil)				\
+		TAILQ_NEXT((elm), field)->field.tqe_prev = 		\
+		    (elm)->field.tqe_prev;				\
+	else {								\
+		(head)->tqh_last = (elm)->field.tqe_prev;		\
+		QMD_TRACE_HEAD(head);					\
+	}								\
+	*(elm)->field.tqe_prev = TAILQ_NEXT((elm), field);		\
+	TRASHIT(*oldnext);						\
+	TRASHIT(*oldprev);						\
+	QMD_TRACE_ELEM(&(elm)->field);					\
+} while (0)
+
+#define TAILQ_SWAP(head1, head2, type, field) do {			\
+	struct type *swap_first = (head1)->tqh_first;			\
+	struct type **swap_last = (head1)->tqh_last;			\
+	(head1)->tqh_first = (head2)->tqh_first;			\
+	(head1)->tqh_last = (head2)->tqh_last;				\
+	(head2)->tqh_first = swap_first;				\
+	(head2)->tqh_last = swap_last;					\
+	if ((swap_first = (head1)->tqh_first) != nil)			\
+		swap_first->field.tqe_prev = &(head1)->tqh_first;	\
+	else								\
+		(head1)->tqh_last = &(head1)->tqh_first;		\
+	if ((swap_first = (head2)->tqh_first) != nil)			\
+		swap_first->field.tqe_prev = &(head2)->tqh_first;	\
+	else								\
+		(head2)->tqh_last = &(head2)->tqh_first;		\
+} while (0)
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/tree.h
@@ -1,0 +1,796 @@
+/*	$NetBSD: tree.h,v 1.8 2004/03/28 19:38:30 provos Exp $	*/
+/*	$OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $	*/
+/* $FreeBSD$ */
+
+/*-
+ * Copyright 2002 Niels Provos <[email protected]>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+/*
+ * This file defines data structures for different types of trees:
+ * splay trees and red-black trees.
+ *
+ * A splay tree is a self-organizing data structure.  Every operation
+ * on the tree causes a splay to happen.  The splay moves the requested
+ * node to the root of the tree and partly rebalances it.
+ *
+ * This has the benefit that request locality causes faster lookups as
+ * the requested nodes move to the top of the tree.  On the other hand,
+ * every lookup causes memory writes.
+ *
+ * The Balance Theorem bounds the total access time for m operations
+ * and n inserts on an initially empty tree as O((m + n)lg n).  The
+ * amortized cost for a sequence of m accesses to a splay tree is O(lg n);
+ *
+ * A red-black tree is a binary search tree with the node color as an
+ * extra attribute.  It fulfills a set of conditions:
+ *	- every search path from the root to a leaf consists of the
+ *	  same number of black nodes,
+ *	- each red node (except for the root) has a black parent,
+ *	- each leaf node is black.
+ *
+ * Every operation on a red-black tree is bounded as O(lg n).
+ * The maximum height of a red-black tree is 2lg (n+1).
+ */
+
+#define SPLAY_HEAD(name, type)						\
+struct name {								\
+	struct type *sph_root; /* root of the tree */			\
+}
+
+#define SPLAY_INITIALIZER(root)						\
+	{ nil }
+
+#define SPLAY_INIT(root) do {						\
+	(root)->sph_root = nil;					\
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ENTRY(type)						\
+struct {								\
+	struct type *spe_left; /* left element */			\
+	struct type *spe_right; /* right element */			\
+}
+
+#define SPLAY_LEFT(elm, field)		(elm)->field.spe_left
+#define SPLAY_RIGHT(elm, field)		(elm)->field.spe_right
+#define SPLAY_ROOT(head)		(head)->sph_root
+#define SPLAY_EMPTY(head)		(SPLAY_ROOT(head) == nil)
+
+/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */
+#define SPLAY_ROTATE_RIGHT(head, tmp, field) do {			\
+	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field);	\
+	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
+	(head)->sph_root = tmp;						\
+} while (/*CONSTCOND*/ 0)
+	
+#define SPLAY_ROTATE_LEFT(head, tmp, field) do {			\
+	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field);	\
+	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
+	(head)->sph_root = tmp;						\
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_LINKLEFT(head, tmp, field) do {				\
+	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
+	tmp = (head)->sph_root;						\
+	(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);		\
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_LINKRIGHT(head, tmp, field) do {				\
+	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
+	tmp = (head)->sph_root;						\
+	(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);	\
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ASSEMBLE(head, node, left, right, field) do {		\
+	SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field);	\
+	SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\
+	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field);	\
+	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field);	\
+} while (/*CONSTCOND*/ 0)
+
+/* Generates prototypes and inline functions */
+
+#define SPLAY_PROTOTYPE(name, type, field, cmp)				\
+void name##_SPLAY(struct name *, struct type *);			\
+void name##_SPLAY_MINMAX(struct name *, int);				\
+struct type *name##_SPLAY_INSERT(struct name *, struct type *);		\
+struct type *name##_SPLAY_REMOVE(struct name *, struct type *);		\
+									\
+/* Finds the node with the same key as elm */				\
+static __inline struct type *						\
+name##_SPLAY_FIND(struct name *head, struct type *elm)			\
+{									\
+	if (SPLAY_EMPTY(head))						\
+		return(nil);						\
+	name##_SPLAY(head, elm);					\
+	if ((cmp)(elm, (head)->sph_root) == 0)				\
+		return (head->sph_root);				\
+	return (nil);							\
+}									\
+									\
+static __inline struct type *						\
+name##_SPLAY_NEXT(struct name *head, struct type *elm)			\
+{									\
+	name##_SPLAY(head, elm);					\
+	if (SPLAY_RIGHT(elm, field) != nil) {				\
+		elm = SPLAY_RIGHT(elm, field);				\
+		while (SPLAY_LEFT(elm, field) != nil) {		\
+			elm = SPLAY_LEFT(elm, field);			\
+		}							\
+	} else								\
+		elm = nil;						\
+	return (elm);							\
+}									\
+									\
+static __inline struct type *						\
+name##_SPLAY_MIN_MAX(struct name *head, int val)			\
+{									\
+	name##_SPLAY_MINMAX(head, val);					\
+        return (SPLAY_ROOT(head));					\
+}
+
+/* Main splay operation.
+ * Moves node close to the key of elm to top
+ */
+#define SPLAY_GENERATE(name, type, field, cmp)				\
+struct type *								\
+name##_SPLAY_INSERT(struct name *head, struct type *elm)		\
+{									\
+    if (SPLAY_EMPTY(head)) {						\
+	    SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = nil;	\
+    } else {								\
+	    int __comp;							\
+	    name##_SPLAY(head, elm);					\
+	    __comp = (cmp)(elm, (head)->sph_root);			\
+	    if(__comp < 0) {						\
+		    SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\
+		    SPLAY_RIGHT(elm, field) = (head)->sph_root;		\
+		    SPLAY_LEFT((head)->sph_root, field) = nil;		\
+	    } else if (__comp > 0) {					\
+		    SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\
+		    SPLAY_LEFT(elm, field) = (head)->sph_root;		\
+		    SPLAY_RIGHT((head)->sph_root, field) = nil;	\
+	    } else							\
+		    return ((head)->sph_root);				\
+    }									\
+    (head)->sph_root = (elm);						\
+    return (nil);							\
+}									\
+									\
+struct type *								\
+name##_SPLAY_REMOVE(struct name *head, struct type *elm)		\
+{									\
+	struct type *__tmp;						\
+	if (SPLAY_EMPTY(head))						\
+		return (nil);						\
+	name##_SPLAY(head, elm);					\
+	if ((cmp)(elm, (head)->sph_root) == 0) {			\
+		if (SPLAY_LEFT((head)->sph_root, field) == nil) {	\
+			(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\
+		} else {						\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\
+			name##_SPLAY(head, elm);			\
+			SPLAY_RIGHT((head)->sph_root, field) = __tmp;	\
+		}							\
+		return (elm);						\
+	}								\
+	return (nil);							\
+}									\
+									\
+void									\
+name##_SPLAY(struct name *head, struct type *elm)			\
+{									\
+	struct type __node, *__left, *__right, *__tmp;			\
+	int __comp;							\
+\
+	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = nil;\
+	__left = __right = &__node;					\
+\
+	while ((__comp = (cmp)(elm, (head)->sph_root)) != 0) {		\
+		if (__comp < 0) {					\
+			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
+			if (__tmp == nil)				\
+				break;					\
+			if ((cmp)(elm, __tmp) < 0){			\
+				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
+				if (SPLAY_LEFT((head)->sph_root, field) == nil)\
+					break;				\
+			}						\
+			SPLAY_LINKLEFT(head, __right, field);		\
+		} else if (__comp > 0) {				\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			if (__tmp == nil)				\
+				break;					\
+			if ((cmp)(elm, __tmp) > 0){			\
+				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
+				if (SPLAY_RIGHT((head)->sph_root, field) == nil)\
+					break;				\
+			}						\
+			SPLAY_LINKRIGHT(head, __left, field);		\
+		}							\
+	}								\
+	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
+}									\
+									\
+/* Splay with either the minimum or the maximum element			\
+ * Used to find minimum or maximum element in tree.			\
+ */									\
+void name##_SPLAY_MINMAX(struct name *head, int __comp) \
+{									\
+	struct type __node, *__left, *__right, *__tmp;			\
+\
+	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = nil;\
+	__left = __right = &__node;					\
+\
+	while (1) {							\
+		if (__comp < 0) {					\
+			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
+			if (__tmp == nil)				\
+				break;					\
+			if (__comp < 0){				\
+				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
+				if (SPLAY_LEFT((head)->sph_root, field) == nil)\
+					break;				\
+			}						\
+			SPLAY_LINKLEFT(head, __right, field);		\
+		} else if (__comp > 0) {				\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			if (__tmp == nil)				\
+				break;					\
+			if (__comp > 0) {				\
+				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
+				if (SPLAY_RIGHT((head)->sph_root, field) == nil)\
+					break;				\
+			}						\
+			SPLAY_LINKRIGHT(head, __left, field);		\
+		}							\
+	}								\
+	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
+}
+
+#define SPLAY_NEGINF	-1
+#define SPLAY_INF	1
+
+#define SPLAY_INSERT(name, x, y)	name##_SPLAY_INSERT(x, y)
+#define SPLAY_REMOVE(name, x, y)	name##_SPLAY_REMOVE(x, y)
+#define SPLAY_FIND(name, x, y)		name##_SPLAY_FIND(x, y)
+#define SPLAY_NEXT(name, x, y)		name##_SPLAY_NEXT(x, y)
+#define SPLAY_MIN(name, x)		(SPLAY_EMPTY(x) ? nil	\
+					: name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))
+#define SPLAY_MAX(name, x)		(SPLAY_EMPTY(x) ? nil	\
+					: name##_SPLAY_MIN_MAX(x, SPLAY_INF))
+
+#define SPLAY_FOREACH(x, name, head)					\
+	for ((x) = SPLAY_MIN(name, head);				\
+	     (x) != nil;						\
+	     (x) = SPLAY_NEXT(name, head, x))
+
+/* Macros that define a red-black tree */
+#define RB_HEAD(name, type)						\
+struct name {								\
+	struct type *rbh_root; /* root of the tree */			\
+}
+
+#define RB_INITIALIZER(root)						\
+	{ nil }
+
+#define RB_INIT(root) do {						\
+	(root)->rbh_root = nil;					\
+} while (/*CONSTCOND*/ 0)
+
+#define RB_BLACK	0
+#define RB_RED		1
+#define RB_ENTRY(type)							\
+struct {								\
+	struct type *rbe_left;		/* left element */		\
+	struct type *rbe_right;		/* right element */		\
+	struct type *rbe_parent;	/* parent element */		\
+	int rbe_color;			/* node color */		\
+}
+
+#define RB_LEFT(elm, field)		(elm)->field.rbe_left
+#define RB_RIGHT(elm, field)		(elm)->field.rbe_right
+#define RB_PARENT(elm, field)		(elm)->field.rbe_parent
+#define RB_COLOR(elm, field)		(elm)->field.rbe_color
+#define RB_ROOT(head)			(head)->rbh_root
+#define RB_EMPTY(head)			(RB_ROOT(head) == nil)
+
+#define RB_SET(elm, parent, field) do {					\
+	RB_PARENT(elm, field) = parent;					\
+	RB_LEFT(elm, field) = RB_RIGHT(elm, field) = nil;		\
+	RB_COLOR(elm, field) = RB_RED;					\
+} while (/*CONSTCOND*/ 0)
+
+#define RB_SET_BLACKRED(black, red, field) do {				\
+	RB_COLOR(black, field) = RB_BLACK;				\
+	RB_COLOR(red, field) = RB_RED;					\
+} while (/*CONSTCOND*/ 0)
+
+#ifndef RB_AUGMENT
+#define RB_AUGMENT(x)	do {} while (0)
+#endif
+
+#define RB_ROTATE_LEFT(head, elm, tmp, field) do {			\
+	(tmp) = RB_RIGHT(elm, field);					\
+	if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field)) != nil) {	\
+		RB_PARENT(RB_LEFT(tmp, field), field) = (elm);		\
+	}								\
+	RB_AUGMENT(elm);						\
+	if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != nil) {	\
+		if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))	\
+			RB_LEFT(RB_PARENT(elm, field), field) = (tmp);	\
+		else							\
+			RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);	\
+	} else								\
+		(head)->rbh_root = (tmp);				\
+	RB_LEFT(tmp, field) = (elm);					\
+	RB_PARENT(elm, field) = (tmp);					\
+	RB_AUGMENT(tmp);						\
+	if ((RB_PARENT(tmp, field)))					\
+		RB_AUGMENT(RB_PARENT(tmp, field));			\
+} while (/*CONSTCOND*/ 0)
+
+#define RB_ROTATE_RIGHT(head, elm, tmp, field) do {			\
+	(tmp) = RB_LEFT(elm, field);					\
+	if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field)) != nil) {	\
+		RB_PARENT(RB_RIGHT(tmp, field), field) = (elm);		\
+	}								\
+	RB_AUGMENT(elm);						\
+	if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != nil) {	\
+		if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))	\
+			RB_LEFT(RB_PARENT(elm, field), field) = (tmp);	\
+		else							\
+			RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);	\
+	} else								\
+		(head)->rbh_root = (tmp);				\
+	RB_RIGHT(tmp, field) = (elm);					\
+	RB_PARENT(elm, field) = (tmp);					\
+	RB_AUGMENT(tmp);						\
+	if ((RB_PARENT(tmp, field)))					\
+		RB_AUGMENT(RB_PARENT(tmp, field));			\
+} while (/*CONSTCOND*/ 0)
+
+/* Generates prototypes and inline functions */
+#define	RB_PROTOTYPE(name, type, field, cmp)				\
+	RB_PROTOTYPE_INTERNAL(name, type, field, cmp,)
+#define	RB_PROTOTYPE_STATIC(name, type, field, cmp)			\
+	RB_PROTOTYPE_INTERNAL(name, type, field, cmp, static)
+#define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr)		\
+	RB_PROTOTYPE_INSERT_COLOR(name, type, attr);			\
+	RB_PROTOTYPE_REMOVE_COLOR(name, type, attr);			\
+	RB_PROTOTYPE_INSERT(name, type, attr);				\
+	RB_PROTOTYPE_REMOVE(name, type, attr);				\
+	RB_PROTOTYPE_FIND(name, type, attr);				\
+	RB_PROTOTYPE_NFIND(name, type, attr);				\
+	RB_PROTOTYPE_NEXT(name, type, attr);				\
+	RB_PROTOTYPE_PREV(name, type, attr);				\
+	RB_PROTOTYPE_MINMAX(name, type, attr);
+#define RB_PROTOTYPE_INSERT_COLOR(name, type, attr)			\
+	attr void name##_RB_INSERT_COLOR(struct name *, struct type *)
+#define RB_PROTOTYPE_REMOVE_COLOR(name, type, attr)			\
+	attr void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *)
+#define RB_PROTOTYPE_REMOVE(name, type, attr)				\
+	attr struct type *name##_RB_REMOVE(struct name *, struct type *)
+#define RB_PROTOTYPE_INSERT(name, type, attr)				\
+	attr struct type *name##_RB_INSERT(struct name *, struct type *)
+#define RB_PROTOTYPE_FIND(name, type, attr)				\
+	attr struct type *name##_RB_FIND(struct name *, struct type *)
+#define RB_PROTOTYPE_NFIND(name, type, attr)				\
+	attr struct type *name##_RB_NFIND(struct name *, struct type *)
+#define RB_PROTOTYPE_NEXT(name, type, attr)				\
+	attr struct type *name##_RB_NEXT(struct type *)
+#define RB_PROTOTYPE_PREV(name, type, attr)				\
+	attr struct type *name##_RB_PREV(struct type *)
+#define RB_PROTOTYPE_MINMAX(name, type, attr)				\
+	attr struct type *name##_RB_MINMAX(struct name *, int)
+
+/* Main rb operation.
+ * Moves node close to the key of elm to top
+ */
+#define	RB_GENERATE(name, type, field, cmp)				\
+	RB_GENERATE_INTERNAL(name, type, field, cmp,)
+#define	RB_GENERATE_STATIC(name, type, field, cmp)			\
+	RB_GENERATE_INTERNAL(name, type, field, cmp, static)
+#define RB_GENERATE_INTERNAL(name, type, field, cmp, attr)		\
+	RB_GENERATE_INSERT_COLOR(name, type, field, attr)		\
+	RB_GENERATE_REMOVE_COLOR(name, type, field, attr)		\
+	RB_GENERATE_INSERT(name, type, field, cmp, attr)		\
+	RB_GENERATE_REMOVE(name, type, field, attr)			\
+	RB_GENERATE_FIND(name, type, field, cmp, attr)			\
+	RB_GENERATE_NFIND(name, type, field, cmp, attr)			\
+	RB_GENERATE_NEXT(name, type, field, attr)			\
+	RB_GENERATE_PREV(name, type, field, attr)			\
+	RB_GENERATE_MINMAX(name, type, field, attr)
+
+#define RB_GENERATE_INSERT_COLOR(name, type, field, attr)		\
+attr void								\
+name##_RB_INSERT_COLOR(struct name *head, struct type *elm)		\
+{									\
+	struct type *parent, *gparent, *tmp;				\
+	while ((parent = RB_PARENT(elm, field)) != nil &&		\
+	    RB_COLOR(parent, field) == RB_RED) {			\
+		gparent = RB_PARENT(parent, field);			\
+		if (parent == RB_LEFT(gparent, field)) {		\
+			tmp = RB_RIGHT(gparent, field);			\
+			if (tmp && RB_COLOR(tmp, field) == RB_RED) {	\
+				RB_COLOR(tmp, field) = RB_BLACK;	\
+				RB_SET_BLACKRED(parent, gparent, field);\
+				elm = gparent;				\
+				continue;				\
+			}						\
+			if (RB_RIGHT(parent, field) == elm) {		\
+				RB_ROTATE_LEFT(head, parent, tmp, field);\
+				tmp = parent;				\
+				parent = elm;				\
+				elm = tmp;				\
+			}						\
+			RB_SET_BLACKRED(parent, gparent, field);	\
+			RB_ROTATE_RIGHT(head, gparent, tmp, field);	\
+		} else {						\
+			tmp = RB_LEFT(gparent, field);			\
+			if (tmp && RB_COLOR(tmp, field) == RB_RED) {	\
+				RB_COLOR(tmp, field) = RB_BLACK;	\
+				RB_SET_BLACKRED(parent, gparent, field);\
+				elm = gparent;				\
+				continue;				\
+			}						\
+			if (RB_LEFT(parent, field) == elm) {		\
+				RB_ROTATE_RIGHT(head, parent, tmp, field);\
+				tmp = parent;				\
+				parent = elm;				\
+				elm = tmp;				\
+			}						\
+			RB_SET_BLACKRED(parent, gparent, field);	\
+			RB_ROTATE_LEFT(head, gparent, tmp, field);	\
+		}							\
+	}								\
+	RB_COLOR(head->rbh_root, field) = RB_BLACK;			\
+}
+
+#define RB_GENERATE_REMOVE_COLOR(name, type, field, attr)		\
+attr void								\
+name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \
+{									\
+	struct type *tmp;						\
+	while ((elm == nil || RB_COLOR(elm, field) == RB_BLACK) &&	\
+	    elm != RB_ROOT(head) && parent != nil) {					\
+		if (RB_LEFT(parent, field) == elm) {			\
+			tmp = RB_RIGHT(parent, field);			\
+			if (RB_COLOR(tmp, field) == RB_RED) {		\
+				RB_SET_BLACKRED(tmp, parent, field);	\
+				RB_ROTATE_LEFT(head, parent, tmp, field);\
+				tmp = RB_RIGHT(parent, field);		\
+			}						\
+			if ((RB_LEFT(tmp, field) == nil ||		\
+			    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+			    (RB_RIGHT(tmp, field) == nil ||		\
+			    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+				RB_COLOR(tmp, field) = RB_RED;		\
+				elm = parent;				\
+				parent = RB_PARENT(elm, field);		\
+			} else {					\
+				if (RB_RIGHT(tmp, field) == nil ||	\
+				    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\
+					struct type *oleft;		\
+					if ((oleft = RB_LEFT(tmp, field)) \
+					    != nil)			\
+						RB_COLOR(oleft, field) = RB_BLACK;\
+					RB_COLOR(tmp, field) = RB_RED;	\
+					RB_ROTATE_RIGHT(head, tmp, oleft, field);\
+					tmp = RB_RIGHT(parent, field);	\
+				}					\
+				RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+				RB_COLOR(parent, field) = RB_BLACK;	\
+				if (RB_RIGHT(tmp, field))		\
+					RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\
+				RB_ROTATE_LEFT(head, parent, tmp, field);\
+				elm = RB_ROOT(head);			\
+				break;					\
+			}						\
+		} else {						\
+			tmp = RB_LEFT(parent, field);			\
+			if (RB_COLOR(tmp, field) == RB_RED) {		\
+				RB_SET_BLACKRED(tmp, parent, field);	\
+				RB_ROTATE_RIGHT(head, parent, tmp, field);\
+				tmp = RB_LEFT(parent, field);		\
+			}						\
+			if ((RB_LEFT(tmp, field) == nil ||		\
+			    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+			    (RB_RIGHT(tmp, field) == nil ||		\
+			    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+				RB_COLOR(tmp, field) = RB_RED;		\
+				elm = parent;				\
+				parent = RB_PARENT(elm, field);		\
+			} else {					\
+				if (RB_LEFT(tmp, field) == nil ||	\
+				    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\
+					struct type *oright;		\
+					if ((oright = RB_RIGHT(tmp, field)) \
+					    != nil)			\
+						RB_COLOR(oright, field) = RB_BLACK;\
+					RB_COLOR(tmp, field) = RB_RED;	\
+					RB_ROTATE_LEFT(head, tmp, oright, field);\
+					tmp = RB_LEFT(parent, field);	\
+				}					\
+				RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+				RB_COLOR(parent, field) = RB_BLACK;	\
+				if (RB_LEFT(tmp, field))		\
+					RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\
+				RB_ROTATE_RIGHT(head, parent, tmp, field);\
+				elm = RB_ROOT(head);			\
+				break;					\
+			}						\
+		}							\
+	}								\
+	if (elm)							\
+		RB_COLOR(elm, field) = RB_BLACK;			\
+}
+
+#define RB_GENERATE_REMOVE(name, type, field, attr)			\
+attr struct type *							\
+name##_RB_REMOVE(struct name *head, struct type *elm)			\
+{									\
+	struct type *child, *parent, *old = elm;			\
+	int color;							\
+	if (RB_LEFT(elm, field) == nil)				\
+		child = RB_RIGHT(elm, field);				\
+	else if (RB_RIGHT(elm, field) == nil)				\
+		child = RB_LEFT(elm, field);				\
+	else {								\
+		struct type *left;					\
+		elm = RB_RIGHT(elm, field);				\
+		while ((left = RB_LEFT(elm, field)) != nil)		\
+			elm = left;					\
+		child = RB_RIGHT(elm, field);				\
+		parent = RB_PARENT(elm, field);				\
+		color = RB_COLOR(elm, field);				\
+		if (child)						\
+			RB_PARENT(child, field) = parent;		\
+		if (parent) {						\
+			if (RB_LEFT(parent, field) == elm)		\
+				RB_LEFT(parent, field) = child;		\
+			else						\
+				RB_RIGHT(parent, field) = child;	\
+			RB_AUGMENT(parent);				\
+		} else							\
+			RB_ROOT(head) = child;				\
+		if (RB_PARENT(elm, field) == old)			\
+			parent = elm;					\
+		(elm)->field = (old)->field;				\
+		if (RB_PARENT(old, field)) {				\
+			if (RB_LEFT(RB_PARENT(old, field), field) == old)\
+				RB_LEFT(RB_PARENT(old, field), field) = elm;\
+			else						\
+				RB_RIGHT(RB_PARENT(old, field), field) = elm;\
+			RB_AUGMENT(RB_PARENT(old, field));		\
+		} else							\
+			RB_ROOT(head) = elm;				\
+		RB_PARENT(RB_LEFT(old, field), field) = elm;		\
+		if (RB_RIGHT(old, field))				\
+			RB_PARENT(RB_RIGHT(old, field), field) = elm;	\
+		if (parent) {						\
+			left = parent;					\
+			do {						\
+				RB_AUGMENT(left);			\
+			} while ((left = RB_PARENT(left, field)) != nil); \
+		}							\
+		goto color;						\
+	}								\
+	parent = RB_PARENT(elm, field);					\
+	color = RB_COLOR(elm, field);					\
+	if (child)							\
+		RB_PARENT(child, field) = parent;			\
+	if (parent) {							\
+		if (RB_LEFT(parent, field) == elm)			\
+			RB_LEFT(parent, field) = child;			\
+		else							\
+			RB_RIGHT(parent, field) = child;		\
+		RB_AUGMENT(parent);					\
+	} else								\
+		RB_ROOT(head) = child;					\
+color:									\
+	if (color == RB_BLACK)						\
+		name##_RB_REMOVE_COLOR(head, parent, child);		\
+	return (old);							\
+}									\
+
+#define RB_GENERATE_INSERT(name, type, field, cmp, attr)		\
+/* Inserts a node into the RB tree */					\
+attr struct type *							\
+name##_RB_INSERT(struct name *head, struct type *elm)			\
+{									\
+	struct type *tmp;						\
+	struct type *parent = nil;					\
+	int comp = 0;							\
+	tmp = RB_ROOT(head);						\
+	while (tmp) {							\
+		parent = tmp;						\
+		comp = (cmp)(elm, parent);				\
+		if (comp < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else if (comp > 0)					\
+			tmp = RB_RIGHT(tmp, field);			\
+		else							\
+			return (tmp);					\
+	}								\
+	RB_SET(elm, parent, field);					\
+	if (parent != nil) {						\
+		if (comp < 0)						\
+			RB_LEFT(parent, field) = elm;			\
+		else							\
+			RB_RIGHT(parent, field) = elm;			\
+		RB_AUGMENT(parent);					\
+	} else								\
+		RB_ROOT(head) = elm;					\
+	name##_RB_INSERT_COLOR(head, elm);				\
+	return (nil);							\
+}
+
+#define RB_GENERATE_FIND(name, type, field, cmp, attr)			\
+/* Finds the node with the same key as elm */				\
+attr struct type *							\
+name##_RB_FIND(struct name *head, struct type *elm)			\
+{									\
+	struct type *tmp = RB_ROOT(head);				\
+	int comp;							\
+	while (tmp) {							\
+		comp = cmp(elm, tmp);					\
+		if (comp < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else if (comp > 0)					\
+			tmp = RB_RIGHT(tmp, field);			\
+		else							\
+			return (tmp);					\
+	}								\
+	return (nil);							\
+}
+
+#define RB_GENERATE_NFIND(name, type, field, cmp, attr)			\
+/* Finds the first node greater than or equal to the search key */	\
+attr struct type *							\
+name##_RB_NFIND(struct name *head, struct type *elm)			\
+{									\
+	struct type *tmp = RB_ROOT(head);				\
+	struct type *res = nil;					\
+	int comp;							\
+	while (tmp) {							\
+		comp = cmp(elm, tmp);					\
+		if (comp < 0) {						\
+			res = tmp;					\
+			tmp = RB_LEFT(tmp, field);			\
+		}							\
+		else if (comp > 0)					\
+			tmp = RB_RIGHT(tmp, field);			\
+		else							\
+			return (tmp);					\
+	}								\
+	return (res);							\
+}
+
+#define RB_GENERATE_NEXT(name, type, field, attr)			\
+/* ARGSUSED */								\
+attr struct type *							\
+name##_RB_NEXT(struct type *elm)					\
+{									\
+	if (RB_RIGHT(elm, field)) {					\
+		elm = RB_RIGHT(elm, field);				\
+		while (RB_LEFT(elm, field))				\
+			elm = RB_LEFT(elm, field);			\
+	} else {							\
+		if (RB_PARENT(elm, field) &&				\
+		    (elm == RB_LEFT(RB_PARENT(elm, field), field)))	\
+			elm = RB_PARENT(elm, field);			\
+		else {							\
+			while (RB_PARENT(elm, field) &&			\
+			    (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\
+				elm = RB_PARENT(elm, field);		\
+			elm = RB_PARENT(elm, field);			\
+		}							\
+	}								\
+	return (elm);							\
+}
+
+#define RB_GENERATE_PREV(name, type, field, attr)			\
+/* ARGSUSED */								\
+attr struct type *							\
+name##_RB_PREV(struct type *elm)					\
+{									\
+	if (RB_LEFT(elm, field)) {					\
+		elm = RB_LEFT(elm, field);				\
+		while (RB_RIGHT(elm, field))				\
+			elm = RB_RIGHT(elm, field);			\
+	} else {							\
+		if (RB_PARENT(elm, field) &&				\
+		    (elm == RB_RIGHT(RB_PARENT(elm, field), field)))	\
+			elm = RB_PARENT(elm, field);			\
+		else {							\
+			while (RB_PARENT(elm, field) &&			\
+			    (elm == RB_LEFT(RB_PARENT(elm, field), field)))\
+				elm = RB_PARENT(elm, field);		\
+			elm = RB_PARENT(elm, field);			\
+		}							\
+	}								\
+	return (elm);							\
+}
+
+#define RB_GENERATE_MINMAX(name, type, field, attr)			\
+attr struct type *							\
+name##_RB_MINMAX(struct name *head, int val)				\
+{									\
+	struct type *tmp = RB_ROOT(head);				\
+	struct type *parent = nil;					\
+	while (tmp) {							\
+		parent = tmp;						\
+		if (val < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else							\
+			tmp = RB_RIGHT(tmp, field);			\
+	}								\
+	return (parent);						\
+}
+
+#define RB_NEGINF	-1
+#define RB_INF	1
+
+#define RB_INSERT(name, x, y)	name##_RB_INSERT(x, y)
+#define RB_REMOVE(name, x, y)	name##_RB_REMOVE(x, y)
+#define RB_FIND(name, x, y)	name##_RB_FIND(x, y)
+#define RB_NFIND(name, x, y)	name##_RB_NFIND(x, y)
+#define RB_NEXT(name, x, y)	name##_RB_NEXT(y)
+#define RB_PREV(name, x, y)	name##_RB_PREV(y)
+#define RB_MIN(name, x)		name##_RB_MINMAX(x, RB_NEGINF)
+#define RB_MAX(name, x)		name##_RB_MINMAX(x, RB_INF)
+
+#define RB_FOREACH(x, name, head)					\
+	for ((x) = RB_MIN(name, head);					\
+	     (x) != nil;						\
+	     (x) = name##_RB_NEXT(x))
+
+#define RB_FOREACH_FROM(x, name, y)					\
+	for ((x) = (y);							\
+	    ((x) != nil) && ((y) = name##_RB_NEXT(x), (x) != nil);	\
+	     (x) = (y))
+
+#define RB_FOREACH_SAFE(x, name, head, y)				\
+	for ((x) = RB_MIN(name, head);					\
+	    ((x) != nil) && ((y) = name##_RB_NEXT(x), (x) != nil);	\
+	     (x) = (y))
+
+#define RB_FOREACH_REVERSE(x, name, head)				\
+	for ((x) = RB_MAX(name, head);					\
+	     (x) != nil;						\
+	     (x) = name##_RB_PREV(x))
+
+#define RB_FOREACH_REVERSE_FROM(x, name, y)				\
+	for ((x) = (y);							\
+	    ((x) != nil) && ((y) = name##_RB_PREV(x), (x) != nil);	\
+	     (x) = (y))
+
+#define RB_FOREACH_REVERSE_SAFE(x, name, head, y)			\
+	for ((x) = RB_MAX(name, head);					\
+	    ((x) != nil) && ((y) = name##_RB_PREV(x), (x) != nil);	\
+	     (x) = (y))
--- /dev/null
+++ b/sys/src/cmd/ext4srv/mkfile
@@ -1,0 +1,61 @@
+</$objtype/mkfile
+
+TARG=ext4srv
+CFLAGS=$CFLAGS -D__${objtype}__ -p -Iinclude
+
+OFILES=\
+	ext4.$O\
+	ext4_balloc.$O\
+	ext4_bcache.$O\
+	ext4_bitmap.$O\
+	ext4_block_group.$O\
+	ext4_blockdev.$O\
+	ext4_crc32.$O\
+	ext4_debug.$O\
+	ext4_dir.$O\
+	ext4_dir_idx.$O\
+	ext4_extent.$O\
+	ext4_fs.$O\
+	ext4_hash.$O\
+	ext4_ialloc.$O\
+	ext4_inode.$O\
+	ext4_journal.$O\
+	ext4_mbr.$O\
+	ext4_mkfs.$O\
+	ext4_super.$O\
+	ext4_trans.$O\
+	ext4srv.$O\
+	group.$O\
+	part.$O\
+
+HFILES=\
+	common.h\
+	group.h\
+	include/ext4.h\
+	include/ext4_balloc.h\
+	include/ext4_bcache.h\
+	include/ext4_bitmap.h\
+	include/ext4_block_group.h\
+	include/ext4_blockdev.h\
+	include/ext4_config.h\
+	include/ext4_crc32.h\
+	include/ext4_debug.h\
+	include/ext4_dir.h\
+	include/ext4_dir_idx.h\
+	include/ext4_extent.h\
+	include/ext4_fs.h\
+	include/ext4_hash.h\
+	include/ext4_ialloc.h\
+	include/ext4_inode.h\
+	include/ext4_journal.h\
+	include/ext4_mbr.h\
+	include/ext4_misc.h\
+	include/ext4_mkfs.h\
+	include/ext4_super.h\
+	include/ext4_trans.h\
+	include/ext4_types.h\
+	include/queue.h\
+	include/tree.h\
+
+BIN=/$objtype/bin
+</sys/src/cmd/mkone
--- /dev/null
+++ b/sys/src/cmd/ext4srv/part.c
@@ -1,0 +1,454 @@
+#include "ext4_config.h"
+#include "ext4.h"
+#include <thread.h>
+#include "ext4_mkfs.h"
+#include "group.h"
+#include "common.h"
+
+#define TRACE(fmt, ...) //fprint(2, fmt, __VA_ARGS__)
+
+#define BDEV2PART(bdev) ((bdev)->bdif->p_user)
+
+static struct {
+	QLock;
+	Part *ps;
+	u32int id;
+}sv;
+
+static long
+preadn(int f, void *av, long n, vlong offset)
+{
+	char *a;
+	long m, t;
+
+	assert(offset >= 0);
+
+	a = av;
+	t = 0;
+	while(t < n){
+		m = pread(f, a+t, n-t, offset);
+		if(m <= 0){
+			if(t == 0)
+				return m;
+			break;
+		}
+		t += m;
+		offset += m;
+	}
+	return t;
+}
+
+static int
+bdopen(struct ext4_blockdev *bdev)
+{
+	Part *p;
+
+	p = BDEV2PART(bdev);
+	TRACE("bdopen %p\n", p);
+	USED(p);
+
+	return 0;
+}
+
+static int
+bdread(struct ext4_blockdev *bdev, void *buf, u64int blkid, u32int blkcnt)
+{
+	Part *p;
+
+	p = BDEV2PART(bdev);
+	TRACE("bdread %p %p %llud %ud\n", p, buf, blkid, blkcnt);
+	if(preadn(p->f, buf, blkcnt*p->bdif.ph_bsize, blkid*p->bdif.ph_bsize) != blkcnt*p->bdif.ph_bsize)
+		return -1;
+
+	return 0;
+}
+
+static int
+bdwrite(struct ext4_blockdev *bdev, const void *buf, u64int blkid, u32int blkcnt)
+{
+	Part *p;
+
+	p = BDEV2PART(bdev);
+	TRACE("bdwrite %p %p %llud %ud\n", p, buf, blkid, blkcnt);
+	if(pwrite(p->f, buf, blkcnt*p->bdif.ph_bsize, blkid*p->bdif.ph_bsize) != blkcnt*p->bdif.ph_bsize)
+		return -1;
+
+	return 0;
+}
+
+static int
+bdclose(struct ext4_blockdev *bdev)
+{
+	Part *p;
+
+	p = BDEV2PART(bdev);
+	TRACE("bdclose %p\n", p);
+	USED(p);
+
+	return 0;
+}
+
+static int
+getblksz(char *dev, u32int *blksz)
+{
+	char *s, *e, *g, *a[5];
+	vlong x;
+	int f, n, r;
+
+	/* default blksz if couldn't find out the real one */
+	*blksz = 512;
+
+	f = -1;
+	g = nil;
+	if((s = smprint("%s_ctl", dev)) == nil)
+		goto error;
+	cleanname(s);
+	if((e = strrchr(s, '/')) == nil)
+		e = s;
+	strcpy(e, "/ctl");
+	f = open(s, OREAD);
+	free(s);
+	if(f >= 0){
+		if((g = malloc(4096)) == nil)
+			goto error;
+		for(n = 0; (r = read(f, g+n, 4096-n-1)) > 0; n += r);
+		g[n] = 0;
+		close(f);
+		f = -1;
+
+		for(s = g; (e = strchr(s, '\n')) != nil; s = e+1){
+			*e = 0;
+			if(tokenize(s, a, nelem(a)) >= 3 && strcmp(a[0], "geometry") == 0){
+				x = strtoll(a[2], &e, 0);
+				if(x > 0 && *e == 0)
+					*blksz = x;
+				if(*blksz != x){
+					werrstr("invalid block size: %s", a[2]);
+					goto error;
+				}
+				break;
+			}
+		}
+	}
+
+	close(f);
+	free(g);
+	return 0;
+error:
+	close(f);
+	free(g);
+	return -1;
+}
+
+static int
+fmtpart(Fmt *f)
+{
+	Part *p;
+
+	p = va_arg(f->args, Part*);
+
+	return fmtprint(f, f->r == 'M' ? "/%#llux" : "dev%#llux", p->qid.path);
+}
+
+static void *
+readfile(Part *p, char *path, usize *sz)
+{
+	usize n, got;
+	char *s, *d;
+	ext4_file f;
+	int r;
+
+	d = nil;
+	while(*path == '/')
+		path++;
+	s = smprint("%M/%s", p, path);
+	r = ext4_fopen2(&f, s, O_RDONLY);
+	free(s);
+
+	if(r == 0){
+		*sz = ext4_fsize(&f);
+		if((d = malloc(*sz+1)) == nil){
+			ext4_fclose(&f);
+			goto error;
+		}
+
+		for(n = 0; n < *sz; n += got){
+			if(ext4_fread(&f, d+n, *sz-n, &got) < 0){
+				werrstr("readfile: %r");
+				ext4_fclose(&f);
+				goto error;
+			}
+			if(got == 0)
+				break;
+		}
+
+		*sz = n;
+		ext4_fclose(&f);
+	}else{
+error:
+		free(d);
+		d = nil;
+		*sz = 0;
+	}
+
+	return d;
+}
+
+static int
+mountpart(Part *p, Opts *opts)
+{
+	usize sz;
+	char *gr;
+	int r;
+
+	r = 0;
+	if(snprint(p->dev, sizeof(p->dev), "%Ð", p) >= sizeof(p->dev)){
+		werrstr("part path too long");
+		goto error;
+	}
+	if(snprint(p->mnt, sizeof(p->mnt), "%M/", p) >= sizeof(p->mnt)){
+		werrstr("part path too long");
+		goto error;
+	}
+	if(ext4_device_register(&p->bdev, p->dev) < 0){
+		werrstr("register: %r");
+		goto error;
+	}
+	if(ext4_mount(p->dev, p->mnt, opts->rdonly) < 0){
+		werrstr("mount: %r");
+		goto error;
+	}
+	if(ext4_mount_setup_locks(p->mnt, &p->oslocks) < 0){
+		werrstr("locks: %r");
+		goto error;
+	}
+	if(ext4_recover(p->mnt) < 0){
+		werrstr("recover: %r");
+		goto error;
+	}
+	if(ext4_journal_start(p->mnt) < 0){
+		werrstr("journal: %r");
+		goto error;
+	}
+	if(opts->cachewb)
+		ext4_cache_write_back(p->mnt, 1);
+
+	if(ext4_get_sblock(p->mnt, &p->sb) < 0){
+		werrstr("sblock: %r");
+		goto error;
+	}
+
+	if(opts->group != nil){
+		r = loadgroups(&p->groups, opts->group);
+	}else if((gr = readfile(p, "/etc/group", &sz)) != nil){
+		gr[sz] = 0;
+		r = loadgroups(&p->groups, gr);
+		free(gr);
+	}
+	if(r != 0)
+		goto error;
+
+	return 0;
+error:
+	werrstr("mountpart: %r");
+	return -1;
+}
+
+static void
+plock(void *aux)
+{
+	Part *p;
+
+	p = aux;
+	qlock(p);
+}
+
+static void
+punlock(void *aux)
+{
+	Part *p;
+
+	p = aux;
+	qunlock(p);
+}
+
+Part *
+openpart(char *dev, Opts *opts)
+{
+	struct ext4_mkfs_info info;
+	struct ext4_fs fs;
+	u32int blksz;
+	Part *p;
+	char *s;
+	Dir *d;
+	int f;
+
+	d = nil;
+	p = nil;
+	s = nil;
+	qlock(&sv);
+
+	fmtinstall(L'Ð', fmtpart);
+	fmtinstall('M', fmtpart);
+
+	f = open(dev, ORDWR);
+	if(f < 0 || (d = dirfstat(f)) == nil)
+		goto error;
+	/* see if it's already opened */
+	for(p = sv.ps; p != nil && p->qid.path != d->qid.path; p = p->next);
+	if(p == nil){ /* no? then make one */
+		if(getblksz(dev, &blksz) != 0 || (p = calloc(1, sizeof(*p)+blksz+strlen(dev)+1)) == nil)
+			goto error;
+
+		p->f = f;
+		p->qid = d->qid;
+		p->bdev.bdif = &p->bdif;
+		p->bdev.part_size = d->length;
+		p->bdif.open = bdopen;
+		p->bdif.bread = bdread;
+		p->bdif.bwrite = bdwrite;
+		p->bdif.close = bdclose;
+		p->bdif.ph_bsize = blksz;
+		p->bdif.ph_bcnt = d->length/blksz;
+		p->bdif.ph_bbuf = p->blkbuf;
+		p->oslocks.lock = plock;
+		p->oslocks.unlock = punlock;
+		p->oslocks.p_user = p;
+		p->bdif.p_user = p;
+
+		p->partdev = (char*)(p+1) + blksz;
+		strcpy(p->partdev, dev);
+
+		if(opts->fstype > 1){
+			memset(&fs, 0, sizeof(fs));
+			memset(&info, 0, sizeof(info));
+			info.block_size = opts->blksz;
+			snprint(info.label, sizeof(info.label), opts->label);
+			info.inode_size = opts->inodesz;
+			info.inodes = opts->ninode;
+			info.journal = true;
+			if(ext4_mkfs(&fs, &p->bdev, &info, opts->fstype) < 0){
+				werrstr("mkfs: %r");
+				goto error;
+			}
+		}
+
+		if(mountpart(p, opts) != 0)
+			goto error;
+
+		p->next = sv.ps;
+		if(sv.ps != nil)
+			sv.ps->prev = p;
+		sv.ps = p;
+		p->qidmask.path = ((uvlong)sv.id++) << 32;
+		p->qidmask.type = QTDIR;
+	}else{
+		close(f);
+	}
+
+	free(d);
+	free(s);
+	qunlock(&sv);
+
+	return p;
+
+error:
+	werrstr("openpart: %r");
+	if(f >= 0)
+		close(f);
+	free(d);
+	free(p);
+	free(s);
+	qunlock(&sv);
+
+	return nil;
+}
+
+static void
+_closepart(Part *p)
+{
+	ext4_cache_write_back(p->mnt, 0);
+	if(ext4_journal_stop(p->mnt) < 0)
+		fprint(2, "closepart: journal %s: %r\n", p->mnt);
+	if(ext4_umount(p->mnt) < 0)
+		fprint(2, "closepart: umount %s: %r\n", p->mnt);
+	if(ext4_device_unregister(p->dev) < 0)
+		fprint(2, "closepart: unregister %s: %r\n", p->dev);
+	close(p->f);
+	if(p->prev != nil)
+		p->prev = p->next;
+	if(p->next != nil)
+		p->next->prev = p->prev;
+	if(p == sv.ps)
+		sv.ps = p->next;
+	freegroups(&p->groups);
+	free(p);
+}
+
+void
+closepart(Part *p)
+{
+	qlock(&sv);
+	_closepart(p);
+	qunlock(&sv);
+}
+
+void
+closeallparts(void)
+{
+	qlock(&sv);
+	while(sv.ps != nil)
+		_closepart(sv.ps);
+	qunlock(&sv);
+}
+
+void
+statallparts(void)
+{
+	struct ext4_mount_stats s;
+	uvlong div;
+	Part *p;
+
+	qlock(&sv);
+	for(p = sv.ps; p != nil; p = p->next){
+		if(ext4_mount_point_stats(p->mnt, &s) < 0){
+			fprint(2, "%s: %r\n", p->partdev);
+		}else{
+			print(
+				"%s (inodes) free %ud, used %ud, total %ud\n",
+				p->partdev,
+				s.free_inodes_count,
+				s.inodes_count-s.free_inodes_count,
+				s.inodes_count
+			);
+			print(
+				"%s (blocks) free %llud, used %llud, total %llud, each %ud\n",
+				p->partdev,
+				s.free_blocks_count,
+				s.blocks_count-s.free_blocks_count,
+				s.blocks_count, s.block_size
+			);
+			div = 1024/(s.block_size/1024);
+			print(
+				"%s (MB) free %llud, used %llud, total %llud\n",
+				p->partdev,
+				s.free_blocks_count/div,
+				(s.blocks_count-s.free_blocks_count)/div,
+				s.blocks_count/div
+			);
+		}
+	}
+	qunlock(&sv);
+}
+
+void
+syncallparts(void)
+{
+	Part *p;
+	qlock(&sv);
+	for(p = sv.ps; p != nil; p = p->next){
+		if(ext4_cache_flush(p->mnt) < 0)
+			fprint(2, "%s: %r\n", p->partdev);
+	}
+	qunlock(&sv);
+}