ref: 10b25b512218fa7fc7b9239bf7f70b03bbd7f4ab
parent: f9230151dd405278cd78ec0e32ba166cc418ab9a
author: Sigrid Solveig Haflínudóttir <[email protected]>
date: Mon Feb 12 14:12:02 EST 2024
replace ext2srv with ext4srv
--- a/sys/man/4/ext2srv
+++ /dev/null
@@ -1,110 +1,0 @@
-.TH EXT2SRV 4
-.SH NAME
-ext2srv \- ext2 file system
-.SH SYNOPSIS
-.B ext2srv
-[
-.B -vrs
-] [
-.B -f
-.I file
-] [
-.B -p
-.I passwd
-] [
-.B -g
-.I group
-] [
-.I service
-]
-.SH DESCRIPTION
-.I Ext2srv
-is a file server that interprets the Linux Second Extended File System.
-A single instance of
-.I ext2srv
-can provide access to multiple ext2 partitions simultaneously.
-.PP
-.I Ext2srv
-posts a file descriptor named
-.I service
-(default
-.BR ext2 )
-in the
-.B /srv
-directory.
-To access an ext2 file system on a device, use
-.B mount
-with the
-.I spec
-argument
-(see
-.IR bind (1))
-the name of the file holding the raw ext2 file system, typically the disk or partition.
-If
-.I spec
-is undefined in the
-.BR mount ,
-.I ext2srv
-will use
-.I file
-as the default name for the device holding the file system.
-.PP
-Normally
-.I ext2srv
-creates a pipe to act as the communications channel between
-itself and its clients.
-The
-.B -s
-flag instructs
-.I ext2srv
-to use its standard input and output instead.
-This flag also prevents the creation of an explicit service file in
-.BR /srv .
-.PP
-The
-.B -v
-flag causes verbose output for debugging, while
-the
-.B -r
-flag (recommended) makes the file system read-only.
-The optional
-.B -p
-and
-.B -g
-flags specify Unix-format password (respectively group) files
-that give the mapping between the numeric user- and group-ID
-numbers in the ext2 file system and the strings reported by Plan 9 status
-inquiries.
-.PP
-There is no authentication or permission checking.
-Anyone who can access the ext2 file system will have full access
-to all its files, including write access if
-.I ext2srv
-is not started with the
-.B -r
-flag, irrespective of file ownership and permission flags.
-.PP
-Some file system state is cached in memory, and may
-be flushed only when the file system is unmounted.
-Therefore if
-.I ext2srv
-is stopped or the machine is rebooted while an ext2 file system
-is still mounted,
-the superblock on the device will have been marked `not valid'
-(unless the
-.B -r
-flag was used),
-and a
-.I fsck
-will be required before that file system may be mounted again.
-.SH BUGS
-There is no authentication or permission checking.
-The implementation has not tracked any changes to the ext2
-specification since it was written.
-There may be other bugs.
-It is advisable to use
-.I ext2srv
-in read-only mode whenever possible.
-.SH AUTHOR
-Bodet Laurent ([email protected]),
-with later updates by Russ Cox and Richard Miller.
--- /dev/null
+++ b/sys/man/4/ext4srv
@@ -1,0 +1,142 @@
+.TH EXT4SRV 4
+.SH NAME
+ext4srv \- ext4 file system
+.SH SYNOPSIS
+.B ext4srv
+[
+.B -Clrs
+] [
+.B -g
+.I groupfile
+] [
+.B -R
+.I uid
+] [
+.I service
+]
+.PP
+.B ext4srv
+.B -M
+.I (2|3|4)
+[
+.B -L
+.I label
+] [
+.B -b
+.I blksize
+] [
+.B -N
+.I numinodes
+] [
+.B -I
+.I inodesize
+]
+.I device
+.SH DESCRIPTION
+.I Ext4srv
+is a file server that interprets the Linux Second, Third and Fourth
+Extended File Systems.
+A single instance of
+.I ext4srv
+can provide access to multiple ext2, ext3 and ext4 partitions
+simultaneously.
+.PP
+.I Ext4srv
+posts a file descriptor named
+.I service
+(default
+.BR ext4 )
+in the
+.B /srv
+directory.
+To access an ext4 file system on a device, use
+.B mount
+with the
+.I spec
+argument
+(see
+.IR bind (1))
+the name of the file holding the raw ext4 file system, typically the disk or partition.
+If
+.I spec
+is undefined in the
+.BR mount ,
+.I ext4srv
+will use
+.I file
+as the default name for the device holding the file system.
+.PP
+Normally
+.I ext4srv
+creates a pipe to act as the communications channel between
+itself and its clients.
+The
+.B -s
+flag instructs
+.I ext4srv
+to use its standard input and output instead.
+This flag also prevents the creation of an explicit service file in
+.BR /srv .
+.PP
+The
+.B -r
+flag (recommended) makes the file system read-only.
+The optional
+.B -g
+flags specify Unix-format group file that give the mapping between the
+numeric user- and group-ID numbers in the ext4 file system and the
+strings reported by Plan 9 status inquiries.
+.PP
+With
+.B -R
+option the filesystem can be mounted in "root" mode, allowing full access regardless
+of permissions. The usual
+.I uid
+in this case is
+.IR 0 .
+.PP
+Optional flag
+.B -l
+enables symlink resolving, otherwise symlinks are hidden by
+default entirely, as Plan 9 does not have that concept.
+.PP
+Some file system state is cached in memory, and may
+be flushed only when the file system is unmounted if
+.B -C
+flag is used, which enabled write-back cache.
+Therefore if
+.I ext4srv
+is stopped or the machine is rebooted while an ext4 file system is
+still mounted, the superblock on the device will have been marked `not
+valid'
+(unless the
+.B -r
+flag was used).
+.SH MKFS
+A different mode of
+.I ext4srv
+is enabled with
+.B -M
+option that accepts the file system version
+.RI ( 2
+for
+.I ext2
+and so on).
+In this mode filesystem is initialized on the specified
+.I device
+and all existing data on it is destroyed.
+.PP
+Additional options may be specified, for example
+.B -L
+may be used to set the filesystem label.
+.SH BUGS
+Yes.
+.PP
+Permission checking is very basic and may not be complete.
+There may be many bugs.
+It is advisable to use
+.I ext4srv
+in read-only mode whenever possible.
+.SH HISTORY
+.I Ext4srv
+first appeared in 9front (February, 2024).
--- a/sys/src/cmd/aux/multi/mkfile
+++ b/sys/src/cmd/aux/multi/mkfile
@@ -33,7 +33,7 @@
dossrv\
echo\
ed\
- ext2srv\
+ ext4srv\
# fcp\
grep\
hget\
--- a/sys/src/cmd/ext2srv/chat.c
+++ /dev/null
@@ -1,53 +1,0 @@
-#include <u.h>
-#include <libc.h>
-#include <fcall.h>
-#include <thread.h>
-#include <9p.h>
-#include "dat.h"
-#include "fns.h"
-
-#define SIZE 1024
-#define DOTDOT (&fmt+1)
-
-int chatty;
-
-void
-chat(char *fmt, ...)
-{
- char buf[SIZE], *out;
- va_list arg;
-
- if (!chatty)
- return;
-
- va_start(arg, fmt);
- out = vseprint(buf, buf+sizeof(buf), fmt, arg);
- va_end(arg);
- write(2, buf, (long)(out-buf));
-}
-
-void
-mchat(char *fmt, ...)
-{
- char buf[SIZE], *out;
- va_list arg;
-
- va_start(arg, fmt);
- out = vseprint(buf, buf+sizeof(buf), fmt, arg);
- va_end(arg);
- write(2, buf, (long)(out-buf));
-}
-void
-panic(char *fmt, ...)
-{
- char buf[SIZE];
- va_list arg;
- int n;
-
- n = sprint(buf, "%s %d: panic ", argv0, getpid());
- va_start(arg, fmt);
- vseprint(buf+n, buf+sizeof(buf)-n, fmt, arg);
- va_end(arg);
- fprint(2, "%s: %r\n", buf);
- exits("panic");
-}
--- a/sys/src/cmd/ext2srv/dat.h
+++ /dev/null
@@ -1,222 +1,0 @@
-typedef struct Xfs Xfs;
-typedef struct Xfile Xfile;
-typedef struct Iobuf Iobuf;
-typedef struct Ext2 Ext2;
-
-typedef struct SuperBlock SuperBlock;
-typedef struct GroupDesc GroupDesc;
-typedef struct Inode Inode;
-typedef struct DirEntry DirEntry;
-
-#define SECTORSIZE 512
-#define OFFSET_SUPER_BLOCK 1024
-
-#define EXT2_SUPER_MAGIC 0xEF53
-#define EXT2_MIN_BLOCK_SIZE 1024
-#define EXT2_MAX_BLOCK_SIZE 4096
-#define EXT2_ROOT_INODE 2
-#define EXT2_FIRST_INO 11
-#define EXT2_VALID_FS 0x0001
-#define EXT2_ERROR_FS 0x0002
-
-/*
- * Structure of the super block
- */
-struct SuperBlock {
- uint s_inodes_count; /* Inodes count */
- uint s_blocks_count; /* Blocks count */
- uint s_r_blocks_count; /* Reserved blocks count */
- uint s_free_blocks_count; /* Free blocks count */
- uint s_free_inodes_count; /* Free inodes count */
- uint s_first_data_block; /* First Data Block */
- uint s_log_block_size; /* Block size */
- int s_log_frag_size; /* Fragment size */
- uint s_blocks_per_group; /* # Blocks per group */
- uint s_frags_per_group; /* # Fragments per group */
- uint s_inodes_per_group; /* # Inodes per group */
- uint s_mtime; /* Mount time */
- uint s_wtime; /* Write time */
- ushort s_mnt_count; /* Mount count */
- short s_max_mnt_count; /* Maximal mount count */
- ushort s_magic; /* Magic signature */
- ushort s_state; /* File system state */
- ushort s_errors; /* Behaviour when detecting errors */
- ushort s_pad;
- uint s_lastcheck; /* time of last check */
- uint s_checkinterval; /* max. time between checks */
- uint s_creator_os; /* OS */
- uint s_rev_level; /* Revision level */
- ushort s_def_resuid; /* Default uid for reserved blocks */
- ushort s_def_resgid; /* Default gid for reserved blocks */
- uint s_reserved[235]; /* Padding to the end of the block */
-};
-
-/*
- * Structure of a blocks group descriptor
- */
-struct GroupDesc
-{
- uint bg_block_bitmap; /* Blocks bitmap block */
- uint bg_inode_bitmap; /* Inodes bitmap block */
- uint bg_inode_table; /* Inodes table block */
- ushort bg_free_blocks_count; /* Free blocks count */
- ushort bg_free_inodes_count; /* Free inodes count */
- ushort bg_used_dirs_count; /* Directories count */
- ushort bg_pad;
- uint bg_reserved[3];
-};
-
-/*
- * Constants relative to the data blocks
- */
-#define EXT2_NDIR_BLOCKS 12
-#define EXT2_IND_BLOCK EXT2_NDIR_BLOCKS
-#define EXT2_DIND_BLOCK (EXT2_IND_BLOCK + 1)
-#define EXT2_TIND_BLOCK (EXT2_DIND_BLOCK + 1)
-#define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1)
-
-/*
- * Structure of an inode on the disk
- */
-struct Inode {
- ushort i_mode; /* File mode */
- ushort i_uid; /* Owner Uid */
- uint i_size; /* Size in bytes */
- uint i_atime; /* Access time */
- uint i_ctime; /* Creation time */
- uint i_mtime; /* Modification time */
- uint i_dtime; /* Deletion Time */
- ushort i_gid; /* Group Id */
- ushort i_links_count; /* Links count */
- uint i_blocks; /* Blocks count */
- uint i_flags; /* File flags */
- uint osd1;
- uint i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
- uint i_version; /* File version (for NFS) */
- uint i_file_acl; /* File ACL */
- uint i_dir_acl; /* Directory ACL */
- uint i_faddr; /* Fragment address */
- uchar osd2[12];
-};
-
-/*
- * Structure of a directory entry
- */
-#define EXT2_NAME_LEN 255
-#define DIR_REC_LEN(name_len) (((name_len) + 8 + 3) & ~3)
-
-struct DirEntry {
- uint inode; /* Inode number */
- ushort rec_len; /* Directory entry length */
- uchar name_len; /* Name length */
- uchar reserved;
- char name[EXT2_NAME_LEN]; /* File name */
-};
-
-#define S_IFMT 00170000
-#define S_IFLNK 0120000
-#define S_IFREG 0100000
-#define S_IFDIR 0040000
-
-#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK)
-#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
-#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
-
-#define DEFAULT_UID 200
-#define DEFAULT_GID 100
-
-struct Iobuf
-{
- Xfs *dev;
- long addr;
- Iobuf *next;
- Iobuf *prev;
- Iobuf *hash;
- int busy;
- int dirty;
- char *iobuf;
-};
-
-struct Xfs{
- Xfs *next;
- char *name; /* of file containing external f.s. */
- Qid qid; /* of file containing external f.s. */
- long ref; /* attach count */
- Qid rootqid; /* of plan9 constructed root directory */
- short dev;
- short fmt;
- void *ptr;
-
- /* data from super block */
-
- int block_size;
- int desc_per_block;
- int inodes_per_group;
- int inodes_per_block;
- int addr_per_block;
- int blocks_per_group;
-
- int ngroups;
- int superaddr, superoff;
- int grpaddr;
-};
-
-struct Xfile{
- Xfile *next; /* in hash bucket */
- long client;
- long fid;
- Xfs * xf;
- void * ptr;
-
- uint inbr; /* inode nbr */
- uint pinbr; /* parrent inode */
- ulong bufaddr; /* addr of inode block */
- ulong bufoffset;
- int root; /* true on attach for ref count */
- int dirindex; /* next dir entry to read */
-};
-
-#define EXT2_SUPER 1
-#define EXT2_DESC 2
-#define EXT2_BBLOCK 3
-#define EXT2_BINODE 4
-
-struct Ext2{
- char type;
- union{
- SuperBlock *sb;
- GroupDesc *gd;
- char *bmp;
- }u;
- Iobuf *buf;
-};
-
-#define DESC_ADDR(xf,n) ( (xf)->grpaddr + ((n)/(xf)->desc_per_block) )
-#define DESC_OFFSET(xf,d,n) ( ((GroupDesc *)(d)) + ((n)%(xf)->desc_per_block) )
-
-enum{
- Asis, Clean, Clunk
-};
-
-enum{
- Enevermind,
- Eformat,
- Eio,
- Enomem,
- Enonexist,
- Eexist,
- Eperm,
- Enofilsys,
- Eauth,
- Enospace,
- Elink,
- Elongname,
- Eintern,
- Ecorrupt,
- Enotclean
-};
-
-extern int chatty;
-extern int errno;
-extern char *deffile;
-extern int rdonly;
--- a/sys/src/cmd/ext2srv/errstr.h
+++ /dev/null
@@ -1,17 +1,0 @@
-char *errmsg[] = {
- [Enevermind] "never mind",
- [Eformat] "unknown format",
- [Eio] "I/O error",
- [Enomem] "server out of memory",
- [Enonexist] "file does not exist",
- [Eexist] "file already exist",
- [Eperm] "permission denied",
- [Enofilsys] "no file system device specified",
- [Eauth] "authentication failed",
- [Enospace] "no space on device",
- [Elink] "write is only allowed in regular files",
- [Elongname] "name is too long",
- [Eintern] "internal Ext2 error",
- [Ecorrupt] "corrupt filesystem",
- [Enotclean] "fs not clean ... running e2fsck is recommended"
-};
--- a/sys/src/cmd/ext2srv/ext2fs.c
+++ /dev/null
@@ -1,348 +1,0 @@
-#include <u.h>
-#include <libc.h>
-#include <fcall.h>
-#include <thread.h>
-#include <9p.h>
-#include "dat.h"
-#include "fns.h"
-
-#define thdr r->ifcall
-#define rhdr r->ofcall
-
-extern int errno;
-
-static void
-response(Req *r)
-{
- char *err;
-
- if (errno) {
- err = xerrstr(errno);
- chat("%s\n", err);
- respond(r, err);
- } else {
- chat("OK\n");
- respond(r, nil);
- }
-}
-
-static void
-rattach(Req *r)
-{
- Xfs *xf;
- Xfile *root;
-
- chat("attach(fid=%d,uname=\"%s\",aname=\"%s\",afid=\"%d\")...",
- thdr.fid, thdr.uname, thdr.aname, thdr.afid);
-
- errno = 0;
- root = xfile(r->fid, Clean);
- if(!root){
- errno = Enomem;
- goto error;
- }
- root->xf = xf = getxfs(thdr.aname);
- if(!xf)
- goto error;
-
- /* now attach root inode */
- if( get_inode(root, EXT2_ROOT_INODE) < 0 )
- goto error;
-
- r->fid->qid.type = QTDIR;
- r->fid->qid.vers = 0;
- root->xf->rootqid = r->fid->qid;
- root->pinbr = EXT2_ROOT_INODE;
- root->root = 1;
- rhdr.qid = r->fid->qid;
-
-error:
- response(r);
-}
-static char *
-rclone(Fid *fid, Fid *newfid)
-{
- Xfile *of = xfile(fid, Asis);
- Xfile *nf = xfile(newfid, Clean);
-
- chat("clone(fid=%d,newfid=%d)...", fid->fid, newfid->fid);
- errno = 0;
- if(!of)
- errno = Eio;
- else if(!nf)
- errno = Enomem;
- else{
- Xfile *next = nf->next;
- *nf = *of;
- nf->next = next;
- nf->fid = newfid->fid;
- nf->root = 0;
- }
- chat("%s\n", errno? xerrstr(errno) : "OK");
- return errno ? xerrstr(errno) : 0;
-}
-static char *
-rwalk1(Fid *fid, char *name, Qid *qid)
-{
- Xfile *f=xfile(fid, Asis);
- int nr, sinbr = 0;
-
- chat("walk1(fid=%d,name=\"%s\")...", fid->fid, name);
- errno = 0;
- if( !f ){
- chat("no xfile...");
- goto error;
- }
- if( !(fid->qid.type & QTDIR) ){
- chat("qid.type=0x%x...", fid->qid.type);
- goto error;
- }
- sinbr = f->pinbr;
- if( name == 0 || name[0] == 0 || !strcmp(name, ".") ){
- *qid = fid->qid;
- goto ok;
- }else if( !strcmp(name, "..") ){
- if( fid->qid.path == f->xf->rootqid.path ){
- chat("walkup from root...");
- *qid = fid->qid;
- goto ok;
- }
- if( get_inode(f, f->pinbr) < 0 )
- goto error;
- if( f->pinbr == EXT2_ROOT_INODE ){
- *qid = f->xf->rootqid;
- f->pinbr = EXT2_ROOT_INODE;
- } else {
- *qid = (Qid){f->pinbr,0,QTDIR};
- f->inbr = f->pinbr;
- if( (nr = get_file(f, "..")) < 0 )
- goto error;
- f->pinbr = nr;
- }
- }else{
- f->pinbr = f->inbr;
- if( (nr = get_file(f, name)) < 0 )
- goto error;
- if( get_inode(f, nr) < 0 )
- goto error;
- *qid = (Qid){nr,0,0};
- if( nr == EXT2_ROOT_INODE )
- *qid = f->xf->rootqid;
- else if( S_ISDIR(getmode(f)) )
- qid->type = QTDIR;
- /*strcpy(f->name, thdr.name);*/
- }
-ok:
- chat("OK\n");
- return 0;
-error:
- f->pinbr = sinbr;
- chat("%s\n", xerrstr(Enonexist));
- return xerrstr(Enonexist);
-}
-static void
-rstat(Req *r)
-{
- Xfile *f=xfile(r->fid, Asis);
-
- chat("stat(fid=%d)...", thdr.fid);
- errno = 0;
- if( !f )
- errno = Eio;
- else{
- dostat(r->fid->qid, f, &r->d);
- }
- response(r);
-}
-static void
-rwstat(Req *r)
-{
- Xfile *f=xfile(r->fid, Asis);
-
- chat("wstat(fid=%d)...", thdr.fid);
- errno = 0;
- if( !f )
- errno = Eio;
- else
- dowstat(f, &r->d);
- response(r);
-}
-static void
-rread(Req *r)
-{
- Xfile *f;
- int nr;
-
- chat("read(fid=%d,offset=%lld,count=%d)...",
- thdr.fid, thdr.offset, thdr.count);
- errno = 0;
- if ( !(f=xfile(r->fid, Asis)) )
- goto error;
- if( r->fid->qid.type & QTDIR ){
- nr = readdir(f, r->rbuf, thdr.offset, thdr.count);
- }else
- nr = readfile(f, r->rbuf, thdr.offset, thdr.count);
-
- if(nr >= 0){
- rhdr.count = nr;
- chat("rcnt=%d...OK\n", nr);
- respond(r, nil);
- return;
- }
-error:
- errno = Eio;
- response(r);
-}
-static void
-rwrite(Req *r)
-{
- Xfile *f; int nr;
-
- chat("write(fid=%d,offset=%lld,count=%d)...",
- thdr.fid, thdr.offset, thdr.count);
-
- errno = 0;
- if (!(f=xfile(r->fid, Asis)) ){
- errno = Eio;
- goto error;
- }
- if( !S_ISREG(getmode(f)) ){
- errno = Elink;
- goto error;
- }
- nr = writefile(f, thdr.data, thdr.offset, thdr.count);
- if(nr >= 0){
- rhdr.count = nr;
- chat("rcnt=%d...OK\n", nr);
- respond(r, nil);
- return;
- }
- errno = Eio;
-error:
- response(r);
-}
-static void
-destroyfid(Fid *fid)
-{
- chat("destroy(fid=%d)\n", fid->fid);
- xfile(fid, Clunk);
- /*syncbuf(xf);*/
-}
-static void
-ropen(Req *r)
-{
- Xfile *f;
-
- chat("open(fid=%d,mode=%d)...", thdr.fid, thdr.mode);
-
- errno = 0;
- f = xfile(r->fid, Asis);
- if( !f ){
- errno = Eio;
- goto error;
- }
-
- if(thdr.mode & OTRUNC){
- if( !S_ISREG(getmode(f)) ){
- errno = Eperm;
- goto error;
- }
- if(truncfile(f) < 0){
- goto error;
- }
- }
- chat("f->qid=0x%8.8lux...", r->fid->qid.path);
- rhdr.qid = r->fid->qid;
-error:
- response(r);
-}
-static void
-rcreate(Req *r)
-{
- Xfile *f;
- int inr, perm;
-
- chat("create(fid=%d,name=\"%s\",perm=%uo,mode=%d)...",
- thdr.fid, thdr.name, thdr.perm, thdr.mode);
-
- errno = 0;
- if(strcmp(thdr.name, ".") == 0 || strcmp(thdr.name, "..") == 0){
- errno = Eperm;
- goto error;
- }
- f = xfile(r->fid, Asis);
- if( !f ){
- errno = Eio;
- goto error;
- }
- if( strlen(thdr.name) > EXT2_NAME_LEN ){
- chat("name too long ...");
- errno = Elongname;
- goto error;
- }
-
- /* create */
- errno = 0;
- if( thdr.perm & DMDIR ){
- perm = (thdr.perm & ~0777) |
- (getmode(f) & thdr.perm & 0777);
- perm |= S_IFDIR;
- inr = create_dir(f, thdr.name, perm);
- }else{
- perm = (thdr.perm & (~0777|0111)) |
- (getmode(f) & thdr.perm & 0666);
- perm |= S_IFREG;
- inr = create_file(f, thdr.name, perm);
-
- }
- if( inr < 0 )
- goto error;
-
- /* fill with new inode */
- f->pinbr = f->inbr;
- if( get_inode(f, inr) < 0 ){
- errno = Eio;
- goto error;
- }
- r->fid->qid = (Qid){inr, 0, 0};
- if( S_ISDIR(getmode(f)) )
- r->fid->qid.type |= QTDIR;
- chat("f->qid=0x%8.8lux...", r->fid->qid.path);
- rhdr.qid = r->fid->qid;
-error:
- response(r);
-}
-static void
-rremove(Req *r)
-{
- Xfile *f=xfile(r->fid, Asis);
-
- chat("remove(fid=%d) ...", thdr.fid);
-
- errno = 0;
- if(!f){
- errno = Eio;
- goto error;
- }
-
- /* check permission here !!!!*/
-
- unlink(f);
-
-error:
- response(r);
-}
-
-Srv ext2srv = {
- .destroyfid = destroyfid,
- .attach = rattach,
- .stat = rstat,
- .wstat = rwstat,
- .clone = rclone,
- .walk1 = rwalk1,
- .open = ropen,
- .read = rread,
- .write = rwrite,
- .create = rcreate,
- .remove = rremove,
-};
--- a/sys/src/cmd/ext2srv/ext2srv.man
+++ /dev/null
@@ -1,110 +1,0 @@
-.TH EXT2SRV 4
-.SH NAME
-ext2srv \- ext2 file system
-.SH SYNOPSIS
-.B ext2srv
-[
-.B -vrs
-] [
-.B -f
-.I file
-] [
-.B -p
-.I passwd
-] [
-.B -g
-.I group
-] [
-.I service
-]
-.SH DESCRIPTION
-.I Ext2srv
-is a file server that interprets the Linux Second Extended File System.
-A single instance of
-.I ext2srv
-can provide access to multiple ext2 partitions simultaneously.
-.PP
-.I Ext2srv
-posts a file descriptor named
-.I service
-(default
-.BR ext2 )
-in the
-.B /srv
-directory.
-To access an ext2 file system on a device, use
-.B mount
-with the
-.I spec
-argument
-(see
-.IR bind (1))
-the name of the file holding the raw ext2 file system, typically the disk or partition.
-If
-.I spec
-is undefined in the
-.BR mount ,
-.I ext2srv
-will use
-.I file
-as the default name for the device holding the file system.
-.PP
-Normally
-.I ext2srv
-creates a pipe to act as the communications channel between
-itself and its clients.
-The
-.B -s
-flag instructs
-.I ext2srv
-to use its standard input and output instead.
-This flag also prevents the creation of an explicit service file in
-.BR /srv .
-.PP
-The
-.B -v
-flag causes verbose output for debugging, while
-the
-.B -r
-flag (recommended) makes the file system read-only.
-The optional
-.B -p
-and
-.B -g
-flags specify Unix-format password (respectively group) files
-that give the mapping between the numeric user- and group-ID
-numbers in the ext2 file system and the strings reported by Plan 9 status
-inquiries.
-.PP
-There is no authentication or permission checking.
-Anyone who can access the ext2 file system will have full access
-to all its files, including write access if
-.I ext2srv
-is not started with the
-.B -r
-flag, irrespective of file ownership and permission flags.
-.PP
-Some file system state is cached in memory, and may
-be flushed only when the file system is unmounted.
-Therefore if
-.I ext2srv
-is stopped or the machine is rebooted while an ext2 file system
-is still mounted,
-the superblock on the device will have been marked `not valid'
-(unless the
-.B -r
-flag was used),
-and a
-.I fsck
-will be required before that file system may be mounted again.
-.SH BUGS
-There is no authentication or permission checking.
-The implementation has not tracked any changes to the ext2
-specification since it was written.
-There may be other bugs.
-It is advisable to use
-.I ext2srv
-in read-only mode whenever possible.
-.SH AUTHOR
-Bodet Laurent ([email protected]),
-with later updates by Russ Cox and Richard Miller.
--- a/sys/src/cmd/ext2srv/ext2subs.c
+++ /dev/null
@@ -1,1870 +1,0 @@
-/*
- * ext2subs.c version 0.20
- *
- * Some strategic functions come from linux/fs/ext2
- * kernel sources written by Remy Card.
- *
-*/
-
-#include <u.h>
-#include <libc.h>
-#include <bio.h>
-#include <fcall.h>
-#include <thread.h>
-#include <9p.h>
-#include "dat.h"
-#include "fns.h"
-
-#define putext2(e) putbuf((e).buf)
-#define dirtyext2(e) dirtybuf((e).buf)
-
-static Intmap *uidmap, *gidmap;
-
-static int
-getnum(char *s, int *n)
-{
- char *r;
-
- *n = strtol(s, &r, 10);
- return (r != s);
-}
-
-static Intmap*
-idfile(char *f)
-{
- Biobuf *bin;
- Intmap *map;
- char *fields[3];
- char *s;
- int nf, id;
-
- map = allocmap(0);
- bin = Bopen(f, OREAD);
- if (bin == 0)
- return 0;
- while ((s = Brdline(bin, '\n')) != 0) {
- s[Blinelen(bin)-1] = '\0';
- nf = getfields(s, fields, 3, 0, ":");
- if (nf == 3 && getnum(fields[2], &id))
- insertkey(map, id, strdup(fields[0]));
- }
- Bterm(bin);
- return map;
-}
-
-void
-uidfile(char *f)
-{
- uidmap = idfile(f);
-}
-
-void
-gidfile(char *f)
-{
- gidmap = idfile(f);
-}
-
-static char*
-mapuid(int id)
-{
- static char s[12];
- char *p;
-
- if (uidmap && (p = lookupkey(uidmap, id)) != 0)
- return p;
- sprint(s, "%d", id);
- return s;
-}
-
-static char*
-mapgid(int id)
-{
- static char s[12];
- char *p;
-
- if (gidmap && (p = lookupkey(gidmap, id)) != 0)
- return p;
- sprint(s, "%d", id);
- return s;
-}
-
-int
-ext2fs(Xfs *xf)
-{
- SuperBlock superblock;
-
- /* get the super block */
- seek(xf->dev, OFFSET_SUPER_BLOCK, 0);
- if( sizeof(SuperBlock) !=
- read(xf->dev, &superblock, sizeof(SuperBlock)) ){
- chat("can't read super block %r...", xf->dev);
- errno = Eformat;
- return -1;
- }
- if( superblock.s_magic != EXT2_SUPER_MAGIC ){
- chat("Bad super block...");
- errno = Eformat;
- return -1;
- }
- if( !(superblock.s_state & EXT2_VALID_FS) ){
- chat("fs not checked...");
- errno = Enotclean;
- return -1;
- }
-
- xf->block_size = EXT2_MIN_BLOCK_SIZE << superblock.s_log_block_size;
- xf->desc_per_block = xf->block_size / sizeof (GroupDesc);
- xf->inodes_per_group = superblock.s_inodes_per_group;
- xf->inodes_per_block = xf->block_size / sizeof (Inode);
- xf->addr_per_block = xf->block_size / sizeof (uint);
- xf->blocks_per_group = superblock.s_blocks_per_group;
-
- if( xf->block_size == OFFSET_SUPER_BLOCK )
- xf->superaddr = 1, xf->superoff = 0, xf->grpaddr = 2;
- else if( xf->block_size == 2*OFFSET_SUPER_BLOCK ||
- xf->block_size == 4*OFFSET_SUPER_BLOCK )
- xf->superaddr = 0, xf->superoff = OFFSET_SUPER_BLOCK, xf->grpaddr = 1;
- else {
- chat(" blocks of %d bytes are not supported...", xf->block_size);
- errno = Eformat;
- return -1;
- }
-
- chat("good super block...");
-
- xf->ngroups = (superblock.s_blocks_count -
- superblock.s_first_data_block +
- superblock.s_blocks_per_group -1) /
- superblock.s_blocks_per_group;
-
- superblock.s_state &= ~EXT2_VALID_FS;
- superblock.s_mnt_count++;
- seek(xf->dev, OFFSET_SUPER_BLOCK, 0);
- if( !rdonly && sizeof(SuperBlock) !=
- write(xf->dev, &superblock, sizeof(SuperBlock)) ){
- chat("can't write super block...");
- errno = Eio;
- return -1;
- }
-
- return 0;
-}
-Ext2
-getext2(Xfs *xf, char type, int n)
-{
- Iobuf *bd;
- Ext2 e;
-
- switch(type){
- case EXT2_SUPER:
- e.buf = getbuf(xf, xf->superaddr);
- if( !e.buf ) goto error;
- e.u.sb = (SuperBlock *)(e.buf->iobuf + xf->superoff);
- e.type = EXT2_SUPER;
- break;
- case EXT2_DESC:
- e.buf = getbuf(xf, DESC_ADDR(xf, n));
- if( !e.buf ) goto error;
- e.u.gd = DESC_OFFSET(xf, e.buf->iobuf, n);
- e.type = EXT2_DESC;
- break;
- case EXT2_BBLOCK:
- bd = getbuf(xf, DESC_ADDR(xf, n));
- if( !bd ) goto error;
- e.buf = getbuf(xf, DESC_OFFSET(xf, bd->iobuf, n)->bg_block_bitmap);
- if( !e.buf ){
- putbuf(bd);
- goto error;
- }
- putbuf(bd);
- e.u.bmp = (char *)e.buf->iobuf;
- e.type = EXT2_BBLOCK;
- break;
- case EXT2_BINODE:
- bd = getbuf(xf, DESC_ADDR(xf, n));
- if( !bd ) goto error;
- e.buf = getbuf(xf, DESC_OFFSET(xf, bd->iobuf, n)->bg_inode_bitmap);
- if( !e.buf ){
- putbuf(bd);
- goto error;
- }
- putbuf(bd);
- e.u.bmp = (char *)e.buf->iobuf;
- e.type = EXT2_BINODE;
- break;
- default:
- goto error;
- }
- return e;
-error:
- panic("getext2");
- return e;
-}
-int
-get_inode( Xfile *file, uint nr )
-{
- unsigned long block_group, block;
- Xfs *xf = file->xf;
- Ext2 ed, es;
-
- es = getext2(xf, EXT2_SUPER, 0);
- if(nr > es.u.sb->s_inodes_count ){
- chat("inode number %d is too big...", nr);
- putext2(es);
- errno = Eio;
- return -1;
- }
- putext2(es);
- block_group = (nr - 1) / xf->inodes_per_group;
- if( block_group >= xf->ngroups ){
- chat("block group (%d) > groups count...", block_group);
- errno = Eio;
- return -1;
- }
- ed = getext2(xf, EXT2_DESC, block_group);
- block = ed.u.gd->bg_inode_table + (((nr-1) % xf->inodes_per_group) /
- xf->inodes_per_block);
- putext2(ed);
-
- file->bufoffset = (nr-1) % xf->inodes_per_block;
- file->inbr = nr;
- file->bufaddr= block;
-
- return 1;
-}
-int
-get_file( Xfile *f, char *name)
-{
- uint offset, nr, i;
- Xfs *xf = f->xf;
- Inode *inode;
- int nblock;
- DirEntry *dir;
- Iobuf *buf, *ibuf;
-
- if( !S_ISDIR(getmode(f)) )
- return -1;
- ibuf = getbuf(xf, f->bufaddr);
- if( !ibuf )
- return -1;
- inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
- nblock = (inode->i_blocks * 512) / xf->block_size;
-
- for(i=0 ; (i < nblock) && (i < EXT2_NDIR_BLOCKS) ; i++){
- buf = getbuf(xf, inode->i_block[i]);
- if( !buf ){
- putbuf(ibuf);
- return -1;
- }
- for(offset=0 ; offset < xf->block_size ; ){
- dir = (DirEntry *)(buf->iobuf + offset);
- if( dir->name_len==strlen(name) &&
- !strncmp(name, dir->name, dir->name_len) ){
- nr = dir->inode;
- putbuf(buf);
- putbuf(ibuf);
- return nr;
- }
- offset += dir->rec_len;
- }
- putbuf(buf);
-
- }
- putbuf(ibuf);
- errno = Enonexist;
- return -1;
-}
-char *
-getname(Xfile *f, char *str)
-{
- Xfile ft;
- int offset, i, len;
- Xfs *xf = f->xf;
- Inode *inode;
- int nblock;
- DirEntry *dir;
- Iobuf *buf, *ibuf;
-
- ft = *f;
- if( get_inode(&ft, f->pinbr) < 0 )
- return 0;
- if( !S_ISDIR(getmode(&ft)) )
- return 0;
- ibuf = getbuf(xf, ft.bufaddr);
- if( !ibuf )
- return 0;
- inode = ((Inode *)ibuf->iobuf) + ft.bufoffset;
- nblock = (inode->i_blocks * 512) / xf->block_size;
-
- for(i=0 ; (i < nblock) && (i < EXT2_NDIR_BLOCKS) ; i++){
- buf = getbuf(xf, inode->i_block[i]);
- if( !buf ){
- putbuf(ibuf);
- return 0;
- }
- for(offset=0 ; offset < xf->block_size ; ){
- dir = (DirEntry *)(buf->iobuf + offset);
- if( f->inbr == dir->inode ){
- len = (dir->name_len < EXT2_NAME_LEN) ? dir->name_len : EXT2_NAME_LEN;
- if (str == 0)
- str = malloc(len+1);
- strncpy(str, dir->name, len);
- str[len] = 0;
- putbuf(buf);
- putbuf(ibuf);
- return str;
- }
- offset += dir->rec_len;
- }
- putbuf(buf);
- }
- putbuf(ibuf);
- errno = Enonexist;
- return 0;
-}
-void
-dostat(Qid qid, Xfile *f, Dir *dir )
-{
- Inode *inode;
- Iobuf *ibuf;
- char *name;
-
- memset(dir, 0, sizeof(Dir));
-
- if( f->inbr == EXT2_ROOT_INODE ){
- dir->name = estrdup9p("/");
- dir->qid = (Qid){0,0,QTDIR};
- dir->mode = DMDIR | 0777;
- }else{
- ibuf = getbuf(f->xf, f->bufaddr);
- if( !ibuf )
- return;
- inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
- dir->length = inode->i_size;
- dir->atime = inode->i_atime;
- dir->mtime = inode->i_mtime;
- putbuf(ibuf);
- name = getname(f, 0);
- dir->name = name;
- dir->uid = estrdup9p(mapuid(inode->i_uid));
- dir->gid = estrdup9p(mapgid(inode->i_gid));
- dir->qid = qid;
- dir->mode = getmode(f);
- if( qid.type & QTDIR )
- dir->mode |= DMDIR;
- }
-
-}
-int
-dowstat(Xfile *f, Dir *stat)
-{
- Xfs *xf = f->xf;
- Inode *inode;
- Xfile fdir;
- Iobuf *ibuf;
- char name[EXT2_NAME_LEN+1];
-
- /* change name */
- getname(f, name);
- if( stat->name && stat->name[0] != 0 && strcmp(name, stat->name) ){
-
- /* get dir */
- fdir = *f;
- if( get_inode(&fdir, f->pinbr) < 0 ){
- chat("can't get inode %d...", f->pinbr);
- return -1;
- }
-
- ibuf = getbuf(xf, fdir.bufaddr);
- if( !ibuf )
- return -1;
- inode = ((Inode *)ibuf->iobuf) +fdir.bufoffset;
-
- /* Clean old dir entry */
- if( delete_entry(xf, inode, f->inbr) < 0 ){
- chat("delete entry failed...");
- putbuf(ibuf);
- return -1;
- }
- putbuf(ibuf);
-
- /* add the new entry */
- if( add_entry(&fdir, stat->name, f->inbr) < 0 ){
- chat("add entry failed...");
- return -1;
- }
-
- }
-
- ibuf = getbuf(xf, f->bufaddr);
- if( !ibuf )
- return -1;
- inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-
- if (stat->mode != ~0)
- if( (getmode(f) & 0777) != (stat->mode & 0777) ){
- inode->i_mode = (getmode(f) & ~0777) | (stat->mode & 0777);
- dirtybuf(ibuf);
- }
- if (stat->mtime != ~0)
- if( inode->i_mtime != stat->mtime ){
- inode->i_mtime = stat->mtime;
- dirtybuf(ibuf);
- }
-
- putbuf(ibuf);
-
- return 1;
-}
-long
-readfile(Xfile *f, void *vbuf, vlong offset, long count)
-{
- Xfs *xf = f->xf;
- Inode *inode;
- Iobuf *buffer, *ibuf;
- long rcount;
- int len, o, cur_block, baddr;
- uchar *buf;
-
- buf = vbuf;
-
- ibuf = getbuf(xf, f->bufaddr);
- if( !ibuf )
- return -1;
- inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-
- if( offset >= inode->i_size ){
- putbuf(ibuf);
- return 0;
- }
- if( offset + count > inode->i_size )
- count = inode->i_size - offset;
-
- /* fast link */
- if( S_ISLNK(getmode(f)) && (inode->i_size <= EXT2_N_BLOCKS<<2) ){
- memcpy(&buf[0], ((char *)inode->i_block)+offset, count);
- putbuf(ibuf);
- return count;
- }
- chat("read block [ ");
- cur_block = offset / xf->block_size;
- o = offset % xf->block_size;
- rcount = 0;
- while( count > 0 ){
- baddr = bmap(f, cur_block++);
- if( !baddr ){
- putbuf(ibuf);
- return -1;
- }
- buffer = getbuf(xf, baddr);
- if( !buffer ){
- putbuf(ibuf);
- return -1;
- }
- chat("%d ", baddr);
- len = xf->block_size - o;
- if( len > count )
- len = count;
- memcpy(&buf[rcount], &buffer->iobuf[o], len);
- rcount += len;
- count -= len;
- o = 0;
- putbuf(buffer);
- }
- chat("] ...");
- inode->i_atime = time(0);
- dirtybuf(ibuf);
- putbuf(ibuf);
- return rcount;
-}
-long
-readdir(Xfile *f, void *vbuf, vlong offset, long count)
-{
- int off, i, len;
- long rcount;
- Xfs *xf = f->xf;
- Inode *inode, *tinode;
- int nblock;
- DirEntry *edir;
- Iobuf *buffer, *ibuf, *tbuf;
- Dir pdir;
- Xfile ft;
- uchar *buf;
- char name[EXT2_NAME_LEN+1];
- unsigned int dirlen;
- int index;
-
- buf = vbuf;
- if (offset == 0)
- f->dirindex = 0;
-
- if( !S_ISDIR(getmode(f)) )
- return -1;
-
- ibuf = getbuf(xf, f->bufaddr);
- if( !ibuf )
- return -1;
- inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
- nblock = (inode->i_blocks * 512) / xf->block_size;
- ft = *f;
- chat("read block [ ");
- index = 0;
- for(i=0, rcount=0 ; (i < nblock) && (i < EXT2_NDIR_BLOCKS) ; i++){
-
- buffer = getbuf(xf, inode->i_block[i]);
- if( !buffer ){
- putbuf(ibuf);
- return -1;
- }
- chat("%d, ", buffer->addr);
- for(off=0 ; off < xf->block_size ; ){
-
- edir = (DirEntry *)(buffer->iobuf + off);
- off += edir->rec_len;
- if( (edir->name[0] == '.' ) && (edir->name_len == 1))
- continue;
- if(edir->name[0] == '.' && edir->name[1] == '.' &&
- edir->name_len == 2)
- continue;
- if( edir->inode == 0 ) /* for lost+found dir ... */
- continue;
- if( index++ < f->dirindex )
- continue;
-
- if( get_inode(&ft, edir->inode) < 0 ){
- chat("can't find ino no %d ] ...", edir->inode);
-error: putbuf(buffer);
- putbuf(ibuf);
- return -1;
- }
- tbuf = getbuf(xf, ft.bufaddr);
- if( !tbuf )
- goto error;
- tinode = ((Inode *)tbuf->iobuf) + ft.bufoffset;
-
- memset(&pdir, 0, sizeof(Dir));
-
- /* fill plan9 dir struct */
- pdir.name = name;
- len = (edir->name_len < EXT2_NAME_LEN) ? edir->name_len : EXT2_NAME_LEN;
- strncpy(pdir.name, edir->name, len);
- pdir.name[len] = 0;
-// chat("name %s len %d\n", pdir.name, edir->name_len);
- pdir.uid = mapuid(tinode->i_uid);
- pdir.gid = mapgid(tinode->i_gid);
- pdir.qid.path = edir->inode;
- pdir.mode = tinode->i_mode;
- if( edir->inode == EXT2_ROOT_INODE )
- pdir.qid.path = f->xf->rootqid.path;
- else if( S_ISDIR( tinode->i_mode) )
- pdir.qid.type |= QTDIR;
- if( pdir.qid.type & QTDIR )
- pdir.mode |= DMDIR;
- pdir.length = tinode->i_size;
- pdir.atime = tinode->i_atime;
- pdir.mtime = tinode->i_mtime;
-
- putbuf(tbuf);
-
- dirlen = convD2M(&pdir, &buf[rcount], count-rcount);
- if ( dirlen <= BIT16SZ ) {
- chat("] ...");
- putbuf(buffer);
- putbuf(ibuf);
- return rcount;
- }
- rcount += dirlen;
- f->dirindex++;
-
- }
- putbuf(buffer);
- }
- chat("] ...");
- putbuf(ibuf);
- return rcount;
-}
-int
-bmap( Xfile *f, int block )
-{
- Xfs *xf = f->xf;
- Inode *inode;
- Iobuf *buf, *ibuf;
- int addr;
- int addr_per_block = xf->addr_per_block;
- int addr_per_block_bits = ffz(~addr_per_block);
-
- if(block < 0) {
- chat("bmap() block < 0 ...");
- return 0;
- }
- if(block >= EXT2_NDIR_BLOCKS + addr_per_block +
- (1 << (addr_per_block_bits * 2)) +
- ((1 << (addr_per_block_bits * 2)) << addr_per_block_bits)) {
- chat("bmap() block > big...");
- return 0;
- }
-
- ibuf = getbuf(xf, f->bufaddr);
- if( !ibuf )
- return 0;
- inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-
- /* direct blocks */
- if(block < EXT2_NDIR_BLOCKS){
- putbuf(ibuf);
- return inode->i_block[block];
- }
- block -= EXT2_NDIR_BLOCKS;
-
- /* indirect blocks*/
- if(block < addr_per_block) {
- addr = inode->i_block[EXT2_IND_BLOCK];
- if (!addr) goto error;
- buf = getbuf(xf, addr);
- if( !buf ) goto error;
- addr = *(((uint *)buf->iobuf) + block);
- putbuf(buf);
- putbuf(ibuf);
- return addr;
- }
- block -= addr_per_block;
-
- /* double indirect blocks */
- if(block < (1 << (addr_per_block_bits * 2))) {
- addr = inode->i_block[EXT2_DIND_BLOCK];
- if (!addr) goto error;
- buf = getbuf(xf, addr);
- if( !buf ) goto error;
- addr = *(((uint *)buf->iobuf) + (block >> addr_per_block_bits));
- putbuf(buf);
- buf = getbuf(xf, addr);
- if( !buf ) goto error;
- addr = *(((uint *)buf->iobuf) + (block & (addr_per_block - 1)));
- putbuf(buf);
- putbuf(ibuf);
- return addr;
- }
- block -= (1 << (addr_per_block_bits * 2));
-
- /* triple indirect blocks */
- addr = inode->i_block[EXT2_TIND_BLOCK];
- if(!addr) goto error;
- buf = getbuf(xf, addr);
- if( !buf ) goto error;
- addr = *(((uint *)buf->iobuf) + (block >> (addr_per_block_bits * 2)));
- putbuf(buf);
- if(!addr) goto error;
- buf = getbuf(xf, addr);
- if( !buf ) goto error;
- addr = *(((uint *)buf->iobuf) +
- ((block >> addr_per_block_bits) & (addr_per_block - 1)));
- putbuf(buf);
- if(!addr) goto error;
- buf = getbuf(xf, addr);
- if( !buf ) goto error;
- addr = *(((uint *)buf->iobuf) + (block & (addr_per_block - 1)));
- putbuf(buf);
- putbuf(ibuf);
- return addr;
-error:
- putbuf(ibuf);
- return 0;
-}
-long
-writefile(Xfile *f, void *vbuf, vlong offset, long count)
-{
- Xfs *xf = f->xf;
- Inode *inode;
- Iobuf *buffer, *ibuf;
- long w;
- int len, o, cur_block, baddr;
- char *buf;
-
- buf = vbuf;
-
- ibuf = getbuf(xf, f->bufaddr);
- if( !ibuf )
- return -1;
- inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-
- chat("write block [ ");
- cur_block = offset / xf->block_size;
- o = offset % xf->block_size;
- w = 0;
- while( count > 0 ){
- baddr = getblk(f, cur_block++);
- if( baddr <= 0 )
- goto end;
- buffer = getbuf(xf, baddr);
- if( !buffer )
- goto end;
- chat("%d ", baddr);
- len = xf->block_size - o;
- if( len > count )
- len = count;
- memcpy(&buffer->iobuf[o], &buf[w], len);
- dirtybuf(buffer);
- w += len;
- count -= len;
- o = 0;
- putbuf(buffer);
- }
-end:
- if( inode->i_size < offset + w )
- inode->i_size = offset + w;
- inode->i_atime = inode->i_mtime = time(0);
- dirtybuf(ibuf);
- putbuf(ibuf);
- chat("]...");
- if( errno )
- return -1;
- return w;
-}
-int
-new_block( Xfile *f, int goal )
-{
- Xfs *xf= f->xf;
- int group, block, baddr, k, redo;
- ulong lmap;
- char *p, *r;
- Iobuf *buf;
- Ext2 ed, es, eb;
-
- es = getext2(xf, EXT2_SUPER, 0);
- redo = 0;
-
-repeat:
-
- if( goal < es.u.sb->s_first_data_block || goal >= es.u.sb->s_blocks_count )
- goal = es.u.sb->s_first_data_block;
- group = (goal - es.u.sb->s_first_data_block) / xf->blocks_per_group;
-
- ed = getext2(xf, EXT2_DESC, group);
- eb = getext2(xf, EXT2_BBLOCK, group);
-
- /*
- * First, test if goal block is free
- */
- if( ed.u.gd->bg_free_blocks_count > 0 ){
- block = (goal - es.u.sb->s_first_data_block) % xf->blocks_per_group;
-
- if( !test_bit(block, eb.u.bmp) )
- goto got_block;
-
- if( block ){
- /*
- * goal wasn't free ; search foward for a free
- * block within the next 32 blocks
- */
-
- lmap = (((ulong *)eb.u.bmp)[block>>5]) >>
- ((block & 31) + 1);
- if( block < xf->blocks_per_group - 32 )
- lmap |= (((ulong *)eb.u.bmp)[(block>>5)+1]) <<
- ( 31-(block & 31) );
- else
- lmap |= 0xffffffff << ( 31-(block & 31) );
-
- if( lmap != 0xffffffffl ){
- k = ffz(lmap) + 1;
- if( (block + k) < xf->blocks_per_group ){
- block += k;
- goto got_block;
- }
- }
- }
- /*
- * Search in the remaider of the group
- */
- p = eb.u.bmp + (block>>3);
- r = memscan(p, 0, (xf->blocks_per_group - block + 7) >>3);
- k = ( r - eb.u.bmp )<<3;
- if( k < xf->blocks_per_group ){
- block = k;
- goto search_back;
- }
- k = find_next_zero_bit((unsigned long *)eb.u.bmp,
- xf->blocks_per_group>>3, block);
- if( k < xf->blocks_per_group ){
- block = k;
- goto got_block;
- }
- }
-
- /*
- * Search the rest of groups
- */
- putext2(ed); putext2(eb);
- for(k=0 ; k < xf->ngroups ; k++){
- group++;
- if( group >= xf->ngroups )
- group = 0;
- ed = getext2(xf, EXT2_DESC, group);
- if( ed.u.gd->bg_free_blocks_count > 0 )
- break;
- putext2(ed);
- }
- if( redo && group == xf->ngroups-1 ){
- putext2(ed);
- goto full;
- }
- if( k >=xf->ngroups ){
- /*
- * All groups are full or
- * we have retry (because the last block) and all other
- * groups are also full.
- */
-full:
- chat("no free blocks ...");
- putext2(es);
- errno = Enospace;
- return 0;
- }
- eb = getext2(xf, EXT2_BBLOCK, group);
- r = memscan(eb.u.bmp, 0, xf->blocks_per_group>>3);
- block = (r - eb.u.bmp) <<3;
- if( block < xf->blocks_per_group )
- goto search_back;
- else
- block = find_first_zero_bit((ulong *)eb.u.bmp,
- xf->blocks_per_group>>3);
- if( block >= xf->blocks_per_group ){
- chat("Free block count courupted for block group %d...", group);
- putext2(ed); putext2(eb); putext2(es);
- errno = Ecorrupt;
- return 0;
- }
-
-
-search_back:
- /*
- * A free byte was found in the block. Now search backwards up
- * to 7 bits to find the start of this group of free block.
- */
- for(k=0 ; k < 7 && block > 0 &&
- !test_bit(block-1, eb.u.bmp) ; k++, block--);
-
-got_block:
-
- baddr = block + (group * xf->blocks_per_group) +
- es.u.sb->s_first_data_block;
-
- if( baddr == ed.u.gd->bg_block_bitmap ||
- baddr == ed.u.gd->bg_inode_bitmap ){
- chat("Allocating block in system zone...");
- putext2(ed); putext2(eb); putext2(es);
- errno = Eintern;
- return 0;
- }
-
- if( set_bit(block, eb.u.bmp) ){
- chat("bit already set (%d)...", block);
- putext2(ed); putext2(eb); putext2(es);
- errno = Ecorrupt;
- return 0;
- }
- dirtyext2(eb);
-
- if( baddr >= es.u.sb->s_blocks_count ){
- chat("block >= blocks count...");
- errno = Eintern;
-error:
- clear_bit(block, eb.u.bmp);
- putext2(eb); putext2(ed); putext2(es);
- return 0;
- }
-
- buf = getbuf(xf, baddr);
- if( !buf ){
- if( !redo ){
- /*
- * It's perhaps the last block of the disk and
- * it can't be acceded because the last sector.
- * Therefore, we try one more time with goal at 0
- * to force scanning all groups.
- */
- clear_bit(block, eb.u.bmp);
- putext2(eb); putext2(ed);
- goal = 0; errno = 0; redo++;
- goto repeat;
- }
- goto error;
- }
- memset(&buf->iobuf[0], 0, xf->block_size);
- dirtybuf(buf);
- putbuf(buf);
-
- es.u.sb->s_free_blocks_count--;
- dirtyext2(es);
- ed.u.gd->bg_free_blocks_count--;
- dirtyext2(ed);
-
- putext2(eb);
- putext2(ed);
- putext2(es);
- chat("new ");
- return baddr;
-}
-int
-getblk(Xfile *f, int block)
-{
- Xfs *xf = f->xf;
- int baddr;
- int addr_per_block = xf->addr_per_block;
-
- if (block < 0) {
- chat("getblk() block < 0 ...");
- return 0;
- }
- if(block > EXT2_NDIR_BLOCKS + addr_per_block +
- addr_per_block * addr_per_block +
- addr_per_block * addr_per_block * addr_per_block ){
- chat("getblk() block > big...");
- errno = Eintern;
- return 0;
- }
- if( block < EXT2_NDIR_BLOCKS )
- return inode_getblk(f, block);
- block -= EXT2_NDIR_BLOCKS;
- if( block < addr_per_block ){
- baddr = inode_getblk(f, EXT2_IND_BLOCK);
- baddr = block_getblk(f, baddr, block);
- return baddr;
- }
- block -= addr_per_block;
- if( block < addr_per_block * addr_per_block ){
- baddr = inode_getblk(f, EXT2_DIND_BLOCK);
- baddr = block_getblk(f, baddr, block / addr_per_block);
- baddr = block_getblk(f, baddr, block & ( addr_per_block-1));
- return baddr;
- }
- block -= addr_per_block * addr_per_block;
- baddr = inode_getblk(f, EXT2_TIND_BLOCK);
- baddr = block_getblk(f, baddr, block / (addr_per_block * addr_per_block));
- baddr = block_getblk(f, baddr, (block / addr_per_block) & ( addr_per_block-1));
- return block_getblk(f, baddr, block & ( addr_per_block-1));
-}
-int
-block_getblk(Xfile *f, int rb, int nr)
-{
- Xfs *xf = f->xf;
- Inode *inode;
- int tmp, goal = 0;
- int blocks = xf->block_size / 512;
- Iobuf *buf, *ibuf;
- uint *p;
- Ext2 es;
-
- if( !rb )
- return 0;
-
- buf = getbuf(xf, rb);
- if( !buf )
- return 0;
- p = (uint *)(buf->iobuf) + nr;
- if( *p ){
- tmp = *p;
- putbuf(buf);
- return tmp;
- }
-
- for(tmp=nr - 1 ; tmp >= 0 ; tmp--){
- if( ((uint *)(buf->iobuf))[tmp] ){
- goal = ((uint *)(buf->iobuf))[tmp];
- break;
- }
- }
- if( !goal ){
- es = getext2(xf, EXT2_SUPER, 0);
- goal = (((f->inbr -1) / xf->inodes_per_group) *
- xf->blocks_per_group) +
- es.u.sb->s_first_data_block;
- putext2(es);
- }
-
- tmp = new_block(f, goal);
- if( !tmp ){
- putbuf(buf);
- return 0;
- }
-
- *p = tmp;
- dirtybuf(buf);
- putbuf(buf);
-
- ibuf = getbuf(xf, f->bufaddr);
- if( !ibuf )
- return -1;
- inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
- inode->i_blocks += blocks;
- dirtybuf(ibuf);
- putbuf(ibuf);
-
- return tmp;
-}
-int
-inode_getblk(Xfile *f, int block)
-{
- Xfs *xf = f->xf;
- Inode *inode;
- Iobuf *ibuf;
- int tmp, goal = 0;
- int blocks = xf->block_size / 512;
- Ext2 es;
-
- ibuf = getbuf(xf, f->bufaddr);
- if( !ibuf )
- return -1;
- inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-
-
- if( inode->i_block[block] ){
- putbuf(ibuf);
- return inode->i_block[block];
- }
-
- for(tmp=block - 1 ; tmp >= 0 ; tmp--){
- if( inode->i_block[tmp] ){
- goal = inode->i_block[tmp];
- break;
- }
- }
- if( !goal ){
- es = getext2(xf, EXT2_SUPER, 0);
- goal = (((f->inbr -1) / xf->inodes_per_group) *
- xf->blocks_per_group) +
- es.u.sb->s_first_data_block;
- putext2(es);
- }
-
- tmp = new_block(f, goal);
- if( !tmp ){
- putbuf(ibuf);
- return 0;
- }
-
- inode->i_block[block] = tmp;
- inode->i_blocks += blocks;
- dirtybuf(ibuf);
- putbuf(ibuf);
-
- return tmp;
-}
-int
-new_inode(Xfile *f, int mode)
-{
- Xfs *xf = f->xf;
- Inode *inode, *finode;
- Iobuf *buf, *ibuf;
- int ave,group, i, j;
- Ext2 ed, es, eb;
-
- group = -1;
-
- es = getext2(xf, EXT2_SUPER, 0);
-
- if( S_ISDIR(mode) ){ /* create directory inode */
- ave = es.u.sb->s_free_inodes_count / xf->ngroups;
- for(i=0 ; i < xf->ngroups ; i++){
- ed = getext2(xf, EXT2_DESC, i);
- if( ed.u.gd->bg_free_inodes_count &&
- ed.u.gd->bg_free_inodes_count >= ave ){
- if( group<0 || ed.u.gd->bg_free_inodes_count >
- ed.u.gd->bg_free_inodes_count )
- group = i;
- }
- putext2(ed);
- }
-
- }else{ /* create file inode */
- /* Try to put inode in its parent directory */
- i = (f->inbr -1) / xf->inodes_per_group;
- ed = getext2(xf, EXT2_DESC, i);
- if( ed.u.gd->bg_free_inodes_count ){
- group = i;
- putext2(ed);
- }else{
- /*
- * Use a quadratic hash to find a group whith
- * a free inode
- */
- putext2(ed);
- for( j=1 ; j < xf->ngroups ; j <<= 1){
- i += j;
- if( i >= xf->ngroups )
- i -= xf->ngroups;
- ed = getext2(xf, EXT2_DESC, i);
- if( ed.u.gd->bg_free_inodes_count ){
- group = i;
- putext2(ed);
- break;
- }
- putext2(ed);
- }
- }
- if( group < 0 ){
- /* try a linear search */
- i = ((f->inbr -1) / xf->inodes_per_group) + 1;
- for(j=2 ; j < xf->ngroups ; j++){
- if( ++i >= xf->ngroups )
- i = 0;
- ed = getext2(xf, EXT2_DESC, i);
- if( ed.u.gd->bg_free_inodes_count ){
- group = i;
- putext2(ed);
- break;
- }
- putext2(ed);
- }
- }
-
- }
- if( group < 0 ){
- chat("group < 0...");
- putext2(es);
- return 0;
- }
- ed = getext2(xf, EXT2_DESC, group);
- eb = getext2(xf, EXT2_BINODE, group);
- if( (j = find_first_zero_bit(eb.u.bmp,
- xf->inodes_per_group>>3)) < xf->inodes_per_group){
- if( set_bit(j, eb.u.bmp) ){
- chat("inode %d of group %d is already allocated...", j, group);
- putext2(ed); putext2(eb); putext2(es);
- errno = Ecorrupt;
- return 0;
- }
- dirtyext2(eb);
- }else if( ed.u.gd->bg_free_inodes_count != 0 ){
- chat("free inodes count corrupted for group %d...", group);
- putext2(ed); putext2(eb); putext2(es);
- errno = Ecorrupt;
- return 0;
- }
- i = j;
- j += group * xf->inodes_per_group + 1;
- if( j < EXT2_FIRST_INO || j >= es.u.sb->s_inodes_count ){
- chat("reserved inode or inode > inodes count...");
- errno = Ecorrupt;
-error:
- clear_bit(i, eb.u.bmp);
- putext2(eb); putext2(ed); putext2(es);
- return 0;
- }
-
- buf = getbuf(xf, ed.u.gd->bg_inode_table +
- (((j-1) % xf->inodes_per_group) /
- xf->inodes_per_block));
- if( !buf )
- goto error;
- inode = ((struct Inode *) buf->iobuf) +
- ((j-1) % xf->inodes_per_block);
- memset(inode, 0, sizeof(Inode));
- inode->i_mode = mode;
- inode->i_links_count = 1;
- inode->i_uid = DEFAULT_UID;
- inode->i_gid = DEFAULT_GID;
- inode->i_mtime = inode->i_atime = inode->i_ctime = time(0);
- dirtybuf(buf);
-
- ibuf = getbuf(xf, f->bufaddr);
- if( !ibuf ){
- putbuf(buf);
- goto error;
- }
- finode = ((Inode *)ibuf->iobuf) + f->bufoffset;
- inode->i_flags = finode->i_flags;
- inode->i_uid = finode->i_uid;
- inode->i_gid = finode->i_gid;
- dirtybuf(ibuf);
- putbuf(ibuf);
-
- putbuf(buf);
-
- ed.u.gd->bg_free_inodes_count--;
- if( S_ISDIR(mode) )
- ed.u.gd->bg_used_dirs_count++;
- dirtyext2(ed);
-
- es.u.sb->s_free_inodes_count--;
- dirtyext2(es);
-
- putext2(eb);
- putext2(ed);
- putext2(es);
-
- return j;
-}
-int
-create_file(Xfile *fdir, char *name, int mode)
-{
- int inr;
-
- inr = new_inode(fdir, mode);
- if( !inr ){
- chat("create one new inode failed...");
- return -1;
- }
- if( add_entry(fdir, name, inr) < 0 ){
- chat("add entry failed...");
- free_inode(fdir->xf, inr);
- return -1;
- }
-
- return inr;
-}
-void
-free_inode( Xfs *xf, int inr)
-{
- Inode *inode;
- ulong b, bg;
- Iobuf *buf;
- Ext2 ed, es, eb;
-
- bg = (inr -1) / xf->inodes_per_group;
- b = (inr -1) % xf->inodes_per_group;
-
- ed = getext2(xf, EXT2_DESC, bg);
- buf = getbuf(xf, ed.u.gd->bg_inode_table +
- (b / xf->inodes_per_block));
- if( !buf ){
- putext2(ed);
- return;
- }
- inode = ((struct Inode *) buf->iobuf) +
- ((inr-1) % xf->inodes_per_block);
-
- if( S_ISDIR(inode->i_mode) )
- ed.u.gd->bg_used_dirs_count--;
- memset(inode, 0, sizeof(Inode));
- inode->i_dtime = time(0);
- dirtybuf(buf);
- putbuf(buf);
-
- ed.u.gd->bg_free_inodes_count++;
- dirtyext2(ed);
- putext2(ed);
-
- eb = getext2(xf, EXT2_BINODE, bg);
- clear_bit(b, eb.u.bmp);
- dirtyext2(eb);
- putext2(eb);
-
- es = getext2(xf, EXT2_SUPER, 0);
- es.u.sb->s_free_inodes_count++;
- dirtyext2(es); putext2(es);
-}
-int
-create_dir(Xfile *fdir, char *name, int mode)
-{
- Xfs *xf = fdir->xf;
- DirEntry *de;
- Inode *inode;
- Iobuf *buf, *ibuf;
- Xfile tf;
- int inr, baddr;
-
- inr = new_inode(fdir, mode);
- if( inr == 0 ){
- chat("create one new inode failed...");
- return -1;
- }
- if( add_entry(fdir, name, inr) < 0 ){
- chat("add entry failed...");
- free_inode(fdir->xf, inr);
- return -1;
- }
-
- /* create the empty dir */
-
- tf = *fdir;
- if( get_inode(&tf, inr) < 0 ){
- chat("can't get inode %d...", inr);
- free_inode(fdir->xf, inr);
- return -1;
- }
-
- ibuf = getbuf(xf, tf.bufaddr);
- if( !ibuf ){
- free_inode(fdir->xf, inr);
- return -1;
- }
- inode = ((Inode *)ibuf->iobuf) + tf.bufoffset;
-
-
- baddr = inode_getblk(&tf, 0);
- if( !baddr ){
- putbuf(ibuf);
- ibuf = getbuf(xf, fdir->bufaddr);
- if( !ibuf ){
- free_inode(fdir->xf, inr);
- return -1;
- }
- inode = ((Inode *)ibuf->iobuf) + fdir->bufoffset;
- delete_entry(fdir->xf, inode, inr);
- putbuf(ibuf);
- free_inode(fdir->xf, inr);
- return -1;
- }
-
- inode->i_size = xf->block_size;
- buf = getbuf(xf, baddr);
-
- de = (DirEntry *)buf->iobuf;
- de->inode = inr;
- de->name_len = 1;
- de->rec_len = DIR_REC_LEN(de->name_len);
- strcpy(de->name, ".");
-
- de = (DirEntry *)( (char *)de + de->rec_len);
- de->inode = fdir->inbr;
- de->name_len = 2;
- de->rec_len = xf->block_size - DIR_REC_LEN(1);
- strcpy(de->name, "..");
-
- dirtybuf(buf);
- putbuf(buf);
-
- inode->i_links_count = 2;
- dirtybuf(ibuf);
- putbuf(ibuf);
-
- ibuf = getbuf(xf, fdir->bufaddr);
- if( !ibuf )
- return -1;
- inode = ((Inode *)ibuf->iobuf) + fdir->bufoffset;
-
- inode->i_links_count++;
-
- dirtybuf(ibuf);
- putbuf(ibuf);
-
- return inr;
-}
-int
-add_entry(Xfile *f, char *name, int inr)
-{
- Xfs *xf = f->xf;
- DirEntry *de, *de1;
- int offset, baddr;
- int rec_len, cur_block;
- int namelen = strlen(name);
- Inode *inode;
- Iobuf *buf, *ibuf;
-
- ibuf = getbuf(xf, f->bufaddr);
- if( !ibuf )
- return -1;
- inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-
- if( inode->i_size == 0 ){
- chat("add_entry() no entry !!!...");
- putbuf(ibuf);
- return -1;
- }
- cur_block = offset = 0;
- rec_len = DIR_REC_LEN(namelen);
- buf = getbuf(xf, inode->i_block[cur_block++]);
- if( !buf ){
- putbuf(ibuf);
- return -1;
- }
- de = (DirEntry *)buf->iobuf;
-
- for(;;){
- if( ((char *)de) >= (xf->block_size + buf->iobuf) ){
- putbuf(buf);
- if( cur_block >= EXT2_NDIR_BLOCKS ){
- errno = Enospace;
- putbuf(ibuf);
- return -1;
- }
- if( (baddr = inode_getblk(f, cur_block++)) == 0 ){
- putbuf(ibuf);
- return -1;
- }
- buf = getbuf(xf, baddr);
- if( !buf ){
- putbuf(ibuf);
- return -1;
- }
- if( inode->i_size <= offset ){
- de = (DirEntry *)buf->iobuf;
- de->inode = 0;
- de->rec_len = xf->block_size;
- dirtybuf(buf);
- inode->i_size = offset + xf->block_size;
- dirtybuf(ibuf);
- }else{
- de = (DirEntry *)buf->iobuf;
- }
- }
- if( de->inode != 0 && de->name_len == namelen &&
- !strncmp(name, de->name, namelen) ){
- errno = Eexist;
- putbuf(ibuf); putbuf(buf);
- return -1;
- }
- offset += de->rec_len;
- if( (de->inode == 0 && de->rec_len >= rec_len) ||
- (de->rec_len >= DIR_REC_LEN(de->name_len) + rec_len) ){
- if( de->inode ){
- de1 = (DirEntry *) ((char *)de + DIR_REC_LEN(de->name_len));
- de1->rec_len = de->rec_len - DIR_REC_LEN(de->name_len);
- de->rec_len = DIR_REC_LEN(de->name_len);
- de = de1;
- }
- de->inode = inr;
- de->name_len = namelen;
- memcpy(de->name, name, namelen);
- dirtybuf(buf);
- putbuf(buf);
- inode->i_mtime = inode->i_ctime = time(0);
- dirtybuf(ibuf);
- putbuf(ibuf);
- return 0;
- }
- de = (DirEntry *)((char *)de + de->rec_len);
- }
- /* not reached */
-}
-int
-unlink( Xfile *file )
-{
- Xfs *xf = file->xf;
- Inode *dir;
- int bg, b;
- Inode *inode;
- Iobuf *buf, *ibuf;
- Ext2 ed, es, eb;
-
- if( S_ISDIR(getmode(file)) && !empty_dir(file) ){
- chat("non empty directory...");
- errno = Eperm;
- return -1;
- }
-
- es = getext2(xf, EXT2_SUPER, 0);
-
- /* get dir inode */
- if( file->pinbr >= es.u.sb->s_inodes_count ){
- chat("inode number %d is too big...", file->pinbr);
- putext2(es);
- errno = Eintern;
- return -1;
- }
- bg = (file->pinbr - 1) / xf->inodes_per_group;
- if( bg >= xf->ngroups ){
- chat("block group (%d) > groups count...", bg);
- putext2(es);
- errno = Eintern;
- return -1;
- }
- ed = getext2(xf, EXT2_DESC, bg);
- b = ed.u.gd->bg_inode_table +
- (((file->pinbr-1) % xf->inodes_per_group) /
- xf->inodes_per_block);
- putext2(ed);
- buf = getbuf(xf, b);
- if( !buf ){
- putext2(es);
- return -1;
- }
- dir = ((struct Inode *) buf->iobuf) +
- ((file->pinbr-1) % xf->inodes_per_block);
-
- /* Clean dir entry */
-
- if( delete_entry(xf, dir, file->inbr) < 0 ){
- putbuf(buf);
- putext2(es);
- return -1;
- }
- if( S_ISDIR(getmode(file)) ){
- dir->i_links_count--;
- dirtybuf(buf);
- }
- putbuf(buf);
-
- /* clean blocks */
- ibuf = getbuf(xf, file->bufaddr);
- if( !ibuf ){
- putext2(es);
- return -1;
- }
- inode = ((Inode *)ibuf->iobuf) + file->bufoffset;
-
- if( !S_ISLNK(getmode(file)) ||
- (S_ISLNK(getmode(file)) && (inode->i_size > EXT2_N_BLOCKS<<2)) )
- if( free_block_inode(file) < 0 ){
- chat("error while freeing blocks...");
- putext2(es);
- putbuf(ibuf);
- return -1;
- }
-
-
- /* clean inode */
-
- bg = (file->inbr -1) / xf->inodes_per_group;
- b = (file->inbr -1) % xf->inodes_per_group;
-
- eb = getext2(xf, EXT2_BINODE, bg);
- clear_bit(b, eb.u.bmp);
- dirtyext2(eb);
- putext2(eb);
-
- inode->i_dtime = time(0);
- inode->i_links_count--;
- if( S_ISDIR(getmode(file)) )
- inode->i_links_count = 0;
-
- es.u.sb->s_free_inodes_count++;
- dirtyext2(es);
- putext2(es);
-
- ed = getext2(xf, EXT2_DESC, bg);
- ed.u.gd->bg_free_inodes_count++;
- if( S_ISDIR(getmode(file)) )
- ed.u.gd->bg_used_dirs_count--;
- dirtyext2(ed);
- putext2(ed);
-
- dirtybuf(ibuf);
- putbuf(ibuf);
-
- return 1;
-}
-int
-empty_dir(Xfile *dir)
-{
- Xfs *xf = dir->xf;
- int nblock;
- uint offset, i,count;
- DirEntry *de;
- Inode *inode;
- Iobuf *buf, *ibuf;
-
- if( !S_ISDIR(getmode(dir)) )
- return 0;
-
- ibuf = getbuf(xf, dir->bufaddr);
- if( !ibuf )
- return -1;
- inode = ((Inode *)ibuf->iobuf) + dir->bufoffset;
- nblock = (inode->i_blocks * 512) / xf->block_size;
-
- for(i=0, count=0 ; (i < nblock) && (i < EXT2_NDIR_BLOCKS) ; i++){
- buf = getbuf(xf, inode->i_block[i]);
- if( !buf ){
- putbuf(ibuf);
- return 0;
- }
- for(offset=0 ; offset < xf->block_size ; ){
- de = (DirEntry *)(buf->iobuf + offset);
- if(de->inode)
- count++;
- offset += de->rec_len;
- }
- putbuf(buf);
- if( count > 2 ){
- putbuf(ibuf);
- return 0;
- }
- }
- putbuf(ibuf);
- return 1;
-}
-int
-free_block_inode(Xfile *file)
-{
- Xfs *xf = file->xf;
- int i, j, k;
- ulong b, *y, *z;
- uint *x;
- int naddr;
- Inode *inode;
- Iobuf *buf, *buf1, *buf2, *ibuf;
-
- ibuf = getbuf(xf, file->bufaddr);
- if( !ibuf )
- return -1;
- inode = ((Inode *)ibuf->iobuf) + file->bufoffset;
-
- for(i=0 ; i < EXT2_IND_BLOCK ; i++){
- x = inode->i_block + i;
- if( *x == 0 ){ putbuf(ibuf); return 0; }
- free_block(xf, *x);
- }
- naddr = xf->addr_per_block;
-
- /* indirect blocks */
-
- if( (b=inode->i_block[EXT2_IND_BLOCK]) ){
- buf = getbuf(xf, b);
- if( !buf ){ putbuf(ibuf); return -1; }
- for(i=0 ; i < naddr ; i++){
- x = ((uint *)buf->iobuf) + i;
- if( *x == 0 ) break;
- free_block(xf, *x);
- }
- free_block(xf, b);
- putbuf(buf);
- }
-
- /* double indirect block */
-
- if( (b=inode->i_block[EXT2_DIND_BLOCK]) ){
- buf = getbuf(xf, b);
- if( !buf ){ putbuf(ibuf); return -1; }
- for(i=0 ; i < naddr ; i++){
- x = ((uint *)buf->iobuf) + i;
- if( *x== 0 ) break;
- buf1 = getbuf(xf, *x);
- if( !buf1 ){ putbuf(buf); putbuf(ibuf); return -1; }
- for(j=0 ; j < naddr ; j++){
- y = ((ulong *)buf1->iobuf) + j;
- if( *y == 0 ) break;
- free_block(xf, *y);
- }
- free_block(xf, *x);
- putbuf(buf1);
- }
- free_block(xf, b);
- putbuf(buf);
- }
-
- /* triple indirect block */
-
- if( (b=inode->i_block[EXT2_TIND_BLOCK]) ){
- buf = getbuf(xf, b);
- if( !buf ){ putbuf(ibuf); return -1; }
- for(i=0 ; i < naddr ; i++){
- x = ((uint *)buf->iobuf) + i;
- if( *x == 0 ) break;
- buf1 = getbuf(xf, *x);
- if( !buf1 ){ putbuf(buf); putbuf(ibuf); return -1; }
- for(j=0 ; j < naddr ; j++){
- y = ((ulong *)buf1->iobuf) + j;
- if( *y == 0 ) break;
- buf2 = getbuf(xf, *y);
- if( !buf2 ){ putbuf(buf); putbuf(buf1); putbuf(ibuf); return -1; }
- for(k=0 ; k < naddr ; k++){
- z = ((ulong *)buf2->iobuf) + k;
- if( *z == 0 ) break;
- free_block(xf, *z);
- }
- free_block(xf, *y);
- putbuf(buf2);
- }
- free_block(xf, *x);
- putbuf(buf1);
- }
- free_block(xf, b);
- putbuf(buf);
- }
-
- putbuf(ibuf);
- return 0;
-}
-void free_block( Xfs *xf, ulong block )
-{
- ulong bg;
- Ext2 ed, es, eb;
-
- es = getext2(xf, EXT2_SUPER, 0);
-
- bg = (block - es.u.sb->s_first_data_block) / xf->blocks_per_group;
- block = (block - es.u.sb->s_first_data_block) % xf->blocks_per_group;
-
- eb = getext2(xf, EXT2_BBLOCK, bg);
- clear_bit(block, eb.u.bmp);
- dirtyext2(eb);
- putext2(eb);
-
- es.u.sb->s_free_blocks_count++;
- dirtyext2(es);
- putext2(es);
-
- ed = getext2(xf, EXT2_DESC, bg);
- ed.u.gd->bg_free_blocks_count++;
- dirtyext2(ed);
- putext2(ed);
-
-}
-int
-delete_entry(Xfs *xf, Inode *inode, int inbr)
-{
- int nblock = (inode->i_blocks * 512) / xf->block_size;
- uint offset, i;
- DirEntry *de, *pde;
- Iobuf *buf;
-
- if( !S_ISDIR(inode->i_mode) )
- return -1;
-
- for(i=0 ; (i < nblock) && (i < EXT2_NDIR_BLOCKS) ; i++){
- buf = getbuf(xf, inode->i_block[i]);
- if( !buf )
- return -1;
- pde = 0;
- for(offset=0 ; offset < xf->block_size ; ){
- de = (DirEntry *)(buf->iobuf + offset);
- if( de->inode == inbr ){
- if( pde )
- pde->rec_len += de->rec_len;
- de->inode = 0;
- dirtybuf(buf);
- putbuf(buf);
- return 1;
- }
- offset += de->rec_len;
- pde = de;
- }
- putbuf(buf);
-
- }
- errno = Enonexist;
- return -1;
-}
-int
-truncfile(Xfile *f)
-{
- Inode *inode;
- Iobuf *ibuf;
- chat("trunc(fid=%d) ...", f->fid);
- ibuf = getbuf(f->xf, f->bufaddr);
- if( !ibuf )
- return -1;
- inode = ((Inode *)ibuf->iobuf) + f->bufoffset;
-
- if( free_block_inode(f) < 0 ){
- chat("error while freeing blocks...");
- putbuf(ibuf);
- return -1;
- }
- inode->i_atime = inode->i_mtime = time(0);
- inode->i_blocks = 0;
- inode->i_size = 0;
- memset(inode->i_block, 0, EXT2_N_BLOCKS*sizeof(ulong));
- dirtybuf(ibuf);
- putbuf(ibuf);
- chat("trunc ok...");
- return 0;
-}
-long
-getmode(Xfile *f)
-{
- Iobuf *ibuf;
- long mode;
-
- ibuf = getbuf(f->xf, f->bufaddr);
- if( !ibuf )
- return -1;
- mode = (((Inode *)ibuf->iobuf) + f->bufoffset)->i_mode;
- putbuf(ibuf);
- return mode;
-}
-void
-CleanSuper(Xfs *xf)
-{
- Ext2 es;
-
- es = getext2(xf, EXT2_SUPER, 0);
- es.u.sb->s_state = EXT2_VALID_FS;
- dirtyext2(es);
- putext2(es);
-}
-int
-test_bit(int i, void *data)
-{
- char *pt = (char *)data;
-
- return pt[i>>3] & (0x01 << (i&7));
-}
-
-int
-set_bit(int i, void *data)
-{
- char *pt;
-
- if( test_bit(i, data) )
- return 1; /* bit already set !!! */
-
- pt = (char *)data;
- pt[i>>3] |= (0x01 << (i&7));
-
- return 0;
-}
-
-int
-clear_bit(int i, void *data)
-{
- char *pt;
-
- if( !test_bit(i, data) )
- return 1; /* bit already clear !!! */
-
- pt = (char *)data;
- pt[i>>3] &= ~(0x01 << (i&7));
-
- return 0;
-}
-void *
-memscan( void *data, int c, int count )
-{
- char *pt = (char *)data;
-
- while( count ){
- if( *pt == c )
- return (void *)pt;
- count--;
- pt++;
- }
- return (void *)pt;
-}
-
-int
-find_first_zero_bit( void *data, int count /* in byte */)
-{
- char *pt = (char *)data;
- int n, i;
-
- n = 0;
-
- while( n < count ){
- for(i=0 ; i < 8 ; i++)
- if( !(*pt & (0x01 << (i&7))) )
- return (n<<3) + i;
- n++; pt++;
- }
- return n << 3;
-}
-
-int
-find_next_zero_bit( void *data, int count /* in byte */, int where)
-{
- char *pt = (((char *)data) + (where >> 3));
- int n, i;
-
- n = where >> 3;
- i = where & 7;
-
- while( n < count ){
- for(; i < 8 ; i++)
- if( !(*pt & (0x01 << (i&7))) )
- return (n<<3) + i;
- n++; pt++; i=0;
- }
- return n << 3;
-}
-int
-ffz( int x )
-{
- int c = 0;
- while( x&1 ){
- c++;
- x >>= 1;
- }
- return c;
-}
--- a/sys/src/cmd/ext2srv/fns.h
+++ /dev/null
@@ -1,70 +1,0 @@
-void chat(char*, ...);
-Xfile * clean(Xfile*);
-void dirdump(void*);
-int dosfs(Xfs*);
-int emptydir(Xfile*);
-int falloc(Xfs*);
-int fileaddr(Xfile*, int, int);
-int getfat(Xfs*, int);
-int getfile(Xfile*);
-Xfs * getxfs(char*);
-void panic(char*, ...);
-void putfat(Xfs*, int, int);
-void putfile(Xfile*);
-void refxfs(Xfs*, int);
-long writefile(Xfile*, void*, vlong, long);
-char * xerrstr(int);
-Xfile * xfile(Fid*, int);
-int xfspurge(void);
-
-int ext2fs(Xfs *);
-int get_inode( Xfile *, uint);
-char *getname(Xfile *, char *);
-int get_file(Xfile *, char *);
-int bmap( Xfile *f, int block );
-int ffz(int);
-long readdir(Xfile*, void*, vlong, long);
-long readfile(Xfile*, void*, vlong, long);
-void dostat(Qid, Xfile *, Dir *);
-int new_block( Xfile *, int);
-int test_bit(int, void *);
-int set_bit(int, void *);
-int clear_bit(int , void *);
-void *memscan(void *, int, int);
-int find_first_zero_bit(void *, int);
-int find_next_zero_bit(void *, int, int);
-int block_getblk(Xfile *, int, int);
-int inode_getblk(Xfile *, int);
-int getblk(Xfile *, int);
-int new_inode(Xfile *, int);
-int add_entry(Xfile *, char *, int);
-int create_file(Xfile *, char *, int);
-int create_dir(Xfile *, char *, int);
-int unlink(Xfile *);
-int delete_entry(Xfs *, Inode *, int);
-int free_block_inode(Xfile *);
-void free_block( Xfs *, ulong);
-void free_inode( Xfs *, int);
-int empty_dir(Xfile *);
-int truncfile(Xfile *);
-int dowstat(Xfile *, Dir *);
-long getmode(Xfile *);
-Ext2 getext2(Xfs *, char, int);
-void CleanSuper(Xfs *);
-
-/* Iobuf operations */
-
-Iobuf *getbuf(Xfs *, long addr);
-void putbuf(Iobuf *);
-void purgebuf(Xfs *);
-void iobuf_init(void);
-int xread(Xfs *, Iobuf *, long);
-void syncbuf(void);
-void xwrite(Iobuf *);
-void dirtybuf(Iobuf *);
-
-void mchat(char *fmt, ...);
-void dumpbuf(void);
-
-void gidfile(char*);
-void uidfile(char*);
--- a/sys/src/cmd/ext2srv/iobuf.c
+++ /dev/null
@@ -1,174 +1,0 @@
-#include <u.h>
-#include <libc.h>
-#include <fcall.h>
-#include <thread.h>
-#include <9p.h>
-#include "dat.h"
-#include "fns.h"
-
-#define NIOBUF 100
-#define HIOB (NIOBUF/3)
-
-static Iobuf* hiob[HIOB]; /* hash buckets */
-static Iobuf iobuf[NIOBUF]; /* buffer headers */
-static Iobuf* iohead;
-static Iobuf* iotail;
-
-Iobuf*
-getbuf(Xfs *dev, long addr)
-{
- Iobuf *p, *h, **l, **f;
-
- l = &hiob[addr%HIOB];
- for(p = *l; p; p = p->hash) {
- if(p->addr == addr && p->dev == dev) {
- p->busy++;
- return p;
- }
- }
- /* Find a non-busy buffer from the tail */
- for(p = iotail; p && (p->busy > 0); p = p->prev)
- ;
- if(!p)
- panic("all buffers busy");
- if(p->dirty){
- xwrite(p);
- p->dirty = 0;
- }
-
- if( xread(dev, p, addr) < 0)
- return 0;
- /* Delete from hash chain */
- f = &hiob[p->addr%HIOB];
- if( *f == p )
- *f = p->hash;
- else {
- for(h = *f; h ; h = h->hash)
- if( h->hash == p ){
- h->hash = p->hash;
- break;
- }
- }
- /* Fill and hash */
- p->hash = *l;
- *l = p;
- p->addr = addr;
- p->dev = dev;
- p->busy=1;
-
- return p;
-}
-void
-putbuf(Iobuf *p)
-{
- if(p->busy <= 0)
- panic("putbuf");
- p->busy--;
-
- /* Link onto head for lru */
- if(p == iohead)
- return;
- if( p == iotail ){
- p->prev->next = 0;
- iotail = p->prev;
- }else{
- p->prev->next = p->next;
- p->next->prev = p->prev;
- }
-
- p->prev = 0;
- p->next = iohead;
- iohead->prev = p;
- iohead = p;
-}
-void
-dirtybuf(Iobuf *p)
-{
- if(p->busy <=0)
- panic("dirtybuf");
- p->dirty = 1;
-}
-void
-syncbuf(void)
-{
- Iobuf *p;
-
- for(p=&iobuf[0] ; p<&iobuf[NIOBUF]; p++)
- if( p->dirty ){
- xwrite(p);
- p->dirty = 0;
- }
-}
-void
-purgebuf(Xfs *dev)
-{
- Iobuf *p;
-
- for(p=&iobuf[0]; p<&iobuf[NIOBUF]; p++)
- if(p->dev == dev)
- p->busy = 0;
-
- /* Blow hash chains */
- memset(hiob, 0, sizeof(hiob));
-}
-void
-iobuf_init(void)
-{
- Iobuf *p;
-
- iohead = iobuf;
- iotail = iobuf+NIOBUF-1;
-
- for(p = iobuf; p <= iotail; p++) {
- p->next = p+1;
- p->prev = p-1;
-
- p->iobuf = (char *)malloc(EXT2_MAX_BLOCK_SIZE);
- if(p->iobuf == 0)
- panic("iobuf_init");
- }
-
- iohead->prev = 0;
- iotail->next = 0;
-}
-int
-xread(Xfs *dev, Iobuf *p, long addr)
-{
- /*chat("xread %d,%d...", dev->dev, addr);*/
-
- seek(dev->dev, (vlong)addr*dev->block_size, 0);
- if(read(dev->dev, p->iobuf, dev->block_size) != dev->block_size){
- chat("xread %d, block=%d failed ...", dev->dev, addr);
- errno = Eio;
- return -1;
- }
- /*chat("xread ok...");*/
- return 0;
-}
-void
-xwrite(Iobuf *p)
-{
- Xfs *dev;
- long addr;
-
- dev = p->dev;
- addr = p->addr;
- /*chat("xwrite %d,%d...", dev->dev, addr);*/
-
- seek(dev->dev, (vlong)addr*dev->block_size, 0);
- if(write(dev->dev, p->iobuf, dev->block_size) != dev->block_size){
- chat("xwrite %d, block=%d failed ...", dev->dev, addr);
- errno = Eio;
- return;
- }
- /*chat("xwrite ok...");*/
-}
-void
-dumpbuf(void)
-{
- Iobuf *p;
-
- for(p = iotail; p ; p = p->prev)
- if( p->busy )
- mchat("\nHi ERROR buf(%x, %d, %d)\n", p, p->addr, p->busy);
-}
--- a/sys/src/cmd/ext2srv/mkfile
+++ /dev/null
@@ -1,18 +1,0 @@
-</$objtype/mkfile
-
-TARG=ext2srv
-OFILES=\
- xfssrv.$O\
- xfile.$O\
- ext2fs.$O\
- ext2subs.$O\
- chat.$O\
- iobuf.$O\
-
-HFILES=dat.h\
- fns.h\
-
-BIN=/$objtype/bin
-</sys/src/cmd/mkone
-
-xfssrv.$O: errstr.h
--- a/sys/src/cmd/ext2srv/readme
+++ /dev/null
@@ -1,53 +1,0 @@
-Ext2srv Version 0.2
-----------------
-
-Ext2srv is a file server that interprets EXT2 file systems. Ext2srv is identical
-to dossrv in specification.
-
-I added just one option. By default ext2srv search for the first ext2 partition
-on the device (typically a disk) given by the mount spec option (see bind(1)).
-So, if you have different ext2 partitions on the same disk you can select one
-of them by adding the partition number at the end of the device in the mount
-system call. For example
-
- mount -c /srv/ext2 /n/linux /dev/hd1disk:3
-
-forces the server to look for ext2 filesystem on the third partition of your second
-hard drive.
-
-
-WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
-
- Ext2srv uses some cache. So you must unmount the directory where you
- mount your ext2 partition. It's the only way to synchronise dirty buffers
- with the disk.
-
- Don't reboot your terminal (^t^t r) without explicitly unmount.
-
- Using something like this script is recommended :
-
- #!/bin/rc
-
- unmount /n/linux >[2] /dev/null
- unmount /n/linux2 >[2] /dev/null
- disk/kfscmd halt
-
-WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
-
-
-I provide this software `as is' and without any warranty. Feed back are welcome !!!
-
[email protected]
-
-changes 5/17/2000 - threw away partition table
-walking, fixed name_len (it's a uchar not a ushort).
-
--rsc
-
-changes for 4th edition 13 May 2002 - [email protected]
- - adapted for 9P2000
- - added [-p passwd] [-g group] args as in tapefs(4)
- - create makes files with user and group of parent directory (not 100/200)
- - prevent writing to non-regular files
- - correct calculation of group descriptor block location when bsize!=1024
-
--- a/sys/src/cmd/ext2srv/version
+++ /dev/null
@@ -1,36 +1,0 @@
-# ext2srv
-# [bl]
-
-on trouve le numero de version sur les 2 premières lignes du
-fichier ext2subs.c.
-
-
-Version 0.1 :
-
-1) il n'ya plus de copie d'inode
- tous les iobuf utilisés dans un fonction
- sont libérés. Un getbuf() => Un putbuf.
-
-2) Tous dans les iobufs : super, group desc et bitmaps
-
-3) Il n'ya plus aucune reférence au contenu d'une inode dans la
- structure Xfile.
-
-4) Choix de la parition en passant /dev/hd?disk:n lors du mount
-
-
-Version 0.11 :
-
-1) -v affiche les blocks manipulés en lecture et écriture [18/10/96]
-
-2) bug pour open avec TRUNC sur les liens... fixed [19/10/96]
-
-3) maintenant on jette si la taille des blocks != 1024 dans ext2fs() [21/10/96]
- (c'est quand même mieux pour le moment ...)
-
-
-Version 0.20 :
-
-1) les blocks de 1024, 2048, 4096 octets sont supportés. [22/10/96]
-
-2) le bug sur le qid.vers est détecté mais non corrigé...
\ No newline at end of file
--- a/sys/src/cmd/ext2srv/xfile.c
+++ /dev/null
@@ -1,161 +1,0 @@
-#include <u.h>
-#include <libc.h>
-#include <fcall.h>
-#include <thread.h>
-#include <9p.h>
-#include "dat.h"
-#include "fns.h"
-
-
-static Xfs *xhead;
-static Xfile *freelist;
-static Lock xlock, freelock;
-
-int client;
-
-Xfs *
-getxfs(char *name)
-{
- int fd;
- Dir *dir;
- Xfs *xf, *fxf;
-
- if(name==0 || name[0]==0)
- name = deffile;
- if(name == 0){
- errno = Enofilsys;
- return 0;
- }
- fd = open(name, rdonly ? OREAD : ORDWR);
- if(fd < 0){
- errno = Enonexist;
- return 0;
- }
- if((dir = dirfstat(fd)) == 0){
- errno = Eio;
- close(fd);
- return 0;
- }
- lock(&xlock);
- for(fxf=0, xf=xhead; xf; xf=xf->next){
- if(xf->ref == 0){
- if(fxf == 0)
- fxf = xf;
- continue;
- }
- if(xf->qid.path != dir->qid.path || xf->qid.vers != dir->qid.vers)
- continue;
- if(strcmp(xf->name, name) != 0 || xf->dev < 0)
- continue;
- chat("incref \"%s\", dev=%d...", xf->name, xf->dev);
- ++xf->ref;
- unlock(&xlock);
- close(fd);
- free(dir);
- return xf;
- }
- if(fxf==0){
- fxf = malloc(sizeof(Xfs));
- if(fxf==0){
- unlock(&xlock);
- close(fd);
- free(dir);
- errno = Enomem;
- return 0;
- }
- fxf->next = xhead;
- xhead = fxf;
- }
- chat("alloc \"%s\", dev=%d...", name, fd);
- fxf->name = strdup(name);
- fxf->ref = 1;
- fxf->qid = dir->qid;
- fxf->dev = fd;
- fxf->fmt = 0;
- fxf->ptr = 0;
- free(dir);
- if( ext2fs(fxf)<0 ){
- xhead = fxf->next;
- free(fxf);
- unlock(&xlock);
- return 0;
- }
- unlock(&xlock);
- return fxf;
-}
-
-void
-refxfs(Xfs *xf, int delta)
-{
- lock(&xlock);
- xf->ref += delta;
- if(xf->ref == 0){
- /*mchat("free \"%s\", dev=%d...", xf->name, xf->dev);
- dumpbuf();*/
- CleanSuper(xf);
- syncbuf();
- free(xf->name);
- purgebuf(xf);
- if(xf->dev >= 0){
- close(xf->dev);
- xf->dev = -1;
- }
- }
- unlock(&xlock);
-}
-
-Xfile *
-xfile(Fid *fid, int flag)
-{
- Xfile *f;
-
- f = (Xfile*)fid->aux;
- switch(flag){
- default:
- panic("xfile");
- case Asis:
- return (f && f->xf && f->xf->dev < 0) ? 0 : f;
- case Clean:
- if (f) chat("Clean and fid->aux already exists\n");
- break;
- case Clunk:
- if(f){
- clean(f);
- lock(&freelock);
- f->next = freelist;
- freelist = f;
- unlock(&freelock);
- fid->aux = 0;
- }
- return 0;
- }
- if(f)
- return clean(f);
- lock(&freelock);
- if(f = freelist){ /* assign = */
- freelist = f->next;
- unlock(&freelock);
- } else {
- unlock(&freelock);
- f = malloc(sizeof(Xfile));
- }
- fid->aux = f;
- f->fid = fid->fid;
- f->client = client;
- f->xf = 0;
- f->ptr = 0;
- f->root = 0;
- return f;
-}
-Xfile *
-clean(Xfile *f)
-{
- if(f->xf && f->root){
- refxfs(f->xf, -1);
- f->xf = 0;
- }
- f->xf = 0;
- f->root = 0;
- f->dirindex = 0;
- return f;
-}
--- a/sys/src/cmd/ext2srv/xfssrv.c
+++ /dev/null
@@ -1,91 +1,0 @@
-#include <u.h>
-#include <libc.h>
-#include <fcall.h>
-#include <thread.h>
-#include <9p.h>
-#include "dat.h"
-#include "fns.h"
-
-#include "errstr.h"
-
-int errno;
-int rdonly;
-char *srvfile;
-char *deffile;
-
-extern void iobuf_init(void);
-extern Srv ext2srv;
-
-void
-usage(void)
-{
- fprint(2, "usage: %s [-v] [-s] [-r] [-p passwd] [-g group] [-f devicefile] [srvname]\n", argv0);
- exits("usage");
-}
-
-/*void handler(void *v, char *sig)
-{
- USED(v,sig);
- syncbuf();
- noted(NDFLT);
-}*/
-
-void
-main(int argc, char **argv)
-{
- int stdio;
-
- stdio = 0;
- ARGBEGIN{
- case 'D':
- ++chatty9p;
- break;
- case 'v':
- ++chatty;
- break;
- case 'f':
- deffile = ARGF();
- break;
- case 'g':
- gidfile(ARGF());
- break;
- case 'p':
- uidfile(ARGF());
- break;
- case 's':
- stdio = 1;
- break;
- case 'r':
- rdonly = 1;
- break;
- default:
- usage();
- }ARGEND
-
- if(argc == 0)
- srvfile = "ext2";
- else if(argc == 1)
- srvfile = argv[0];
- else
- usage();
-
- iobuf_init();
- /*notify(handler);*/
-
- if(stdio){
- srv(&ext2srv);
- }else{
- chat("%s %d: serving %s\n", argv0, getpid(), srvfile);
- postmountsrv(&ext2srv, srvfile, 0, 0);
- }
- exits(0);
-}
-
-char *
-xerrstr(int e)
-{
- if (e < 0 || e >= sizeof errmsg/sizeof errmsg[0])
- return "no such error";
- else
- return errmsg[e];
-}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/LICENSE
@@ -1,0 +1,36 @@
+Files include/tree.h and include/queue.h include their own respective
+(BSD 4-clause) license, as well as ext4_hash.c (BSD 2-clause).
+
+The rest of the files are licensed under BSD 3-clause:
+
+Copyright (c) 2013-2017 Grzegorz Kostka ([email protected])
+Copyright (c) 2015-2017 Kaho Ng ([email protected])
+Copyright (c) 2020-2024 Sigrid Solveig Haflínudóttir ([email protected])
+
+HelenOS: Copyright (c) 2012 Martin Sucha
+ Copyright (c) 2012 Frantisek Princ
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+- Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+- The name of the author may not be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- /dev/null
+++ b/sys/src/cmd/ext4srv/NOTES
@@ -1,0 +1,5 @@
+Based on https://github.com/gkostka/lwext4
+
+None of the sources are GPL-licensed:
+ * xattr handling removed altogether
+ * extents logic replaced with a BSD-3 licensed one found in https://github.com/ngkaho1234/lwext
--- /dev/null
+++ b/sys/src/cmd/ext4srv/common.h
@@ -1,0 +1,50 @@
+typedef struct Opts Opts;
+typedef struct Part Part;
+
+#pragma varargck type "Ð" Part*
+#pragma varargck type "M" Part*
+
+struct Opts {
+ char *group;
+ int cachewb;
+ int linkmode;
+ int asroot;
+ int rdonly;
+
+ int fstype;
+ int blksz;
+ int inodesz;
+ u32int ninode;
+ char *label;
+};
+
+struct Part {
+ Ref;
+ QLock;
+ Part *prev, *next;
+
+ char dev[32];
+ char mnt[32];
+ char *partdev;
+
+ struct ext4_blockdev bdev;
+ struct ext4_blockdev_iface bdif;
+ struct ext4_sblock *sb;
+ struct ext4_lock oslocks;
+ Qid qid;
+ Qid qidmask;
+ Groups groups;
+ int f;
+ uchar blkbuf[];
+};
+
+enum {
+ Lhide,
+ Lresolve = 1,
+};
+
+Part *openpart(char *dev, Opts *opts);
+void closepart(Part *p);
+void closeallparts(void);
+void statallparts(void);
+void syncallparts(void);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4.c
@@ -1,0 +1,2961 @@
+#include "ext4_config.h"
+#include "ext4.h"
+#include "ext4_trans.h"
+#include "ext4_fs.h"
+#include "ext4_dir.h"
+#include "ext4_inode.h"
+#include "ext4_super.h"
+#include "ext4_block_group.h"
+#include "ext4_dir_idx.h"
+#include "ext4_journal.h"
+
+char Eexists[] = "file exists";
+char Einval[] = "invalid operation";
+char Eio[] = "i/o error";
+char Enomem[] = "no memory";
+char Enospc[] = "no space";
+char Enotfound[] = "file not found";
+char Eperm[] = "permission denied";
+char Erdonlyfs[] = "read-only fs";
+
+/**@brief Mount point OS dependent lock*/
+#define EXT4_MP_LOCK(_m) \
+ do { \
+ if ((_m)->os_locks) \
+ (_m)->os_locks->lock((_m)->os_locks->p_user); \
+ } while (0)
+
+/**@brief Mount point OS dependent unlock*/
+#define EXT4_MP_UNLOCK(_m) \
+ do { \
+ if ((_m)->os_locks) \
+ (_m)->os_locks->unlock((_m)->os_locks->p_user); \
+ } while (0)
+
+/**@brief Mount point descriptor.*/
+struct ext4_mountpoint {
+
+ /**@brief Mount done flag.*/
+ bool mounted;
+
+ /**@brief Mount point name (@ref ext4_mount)*/
+ char name[CONFIG_EXT4_MAX_MP_NAME + 1];
+
+ /**@brief OS dependent lock/unlock functions.*/
+ const struct ext4_lock *os_locks;
+
+ /**@brief Ext4 filesystem internals.*/
+ struct ext4_fs fs;
+
+ /**@brief JBD fs.*/
+ struct jbd_fs jbd_fs;
+
+ /**@brief Journal.*/
+ struct jbd_journal jbd_journal;
+
+ /**@brief Block cache.*/
+ struct ext4_bcache bc;
+};
+
+/**@brief Block devices descriptor.*/
+struct ext4_block_devices {
+
+ /**@brief Block device name.*/
+ char name[CONFIG_EXT4_MAX_BLOCKDEV_NAME + 1];
+
+ /**@brief Block device handle.*/
+ struct ext4_blockdev *bd;
+};
+
+/**@brief Block devices.*/
+static struct ext4_block_devices s_bdevices[CONFIG_EXT4_BLOCKDEVS_COUNT];
+
+/**@brief Mountpoints.*/
+static struct ext4_mountpoint s_mp[CONFIG_EXT4_MOUNTPOINTS_COUNT];
+
+int ext4_device_register(struct ext4_blockdev *bd,
+ const char *dev_name)
+{
+ assert(bd && dev_name);
+
+ if (strlen(dev_name) > CONFIG_EXT4_MAX_BLOCKDEV_NAME) {
+ werrstr("dev name too long: %s", dev_name);
+ return -1;
+ }
+
+ for (usize i = 0; i < CONFIG_EXT4_BLOCKDEVS_COUNT; ++i) {
+ if (!strcmp(s_bdevices[i].name, dev_name)) {
+ werrstr("dev already exists: %s", dev_name);
+ return -1;
+ }
+ }
+
+ for (usize i = 0; i < CONFIG_EXT4_BLOCKDEVS_COUNT; ++i) {
+ if (!s_bdevices[i].bd) {
+ strcpy(s_bdevices[i].name, dev_name);
+ s_bdevices[i].bd = bd;
+ return 0;
+ }
+ }
+
+ werrstr("dev limit reached");
+ return -1;
+}
+
+int ext4_device_unregister(const char *dev_name)
+{
+ assert(dev_name);
+
+ for (usize i = 0; i < CONFIG_EXT4_BLOCKDEVS_COUNT; ++i) {
+ if (strcmp(s_bdevices[i].name, dev_name) == 0) {
+ memset(&s_bdevices[i], 0, sizeof(s_bdevices[i]));
+ return 0;
+ }
+ }
+
+ werrstr("dev not found: %s", dev_name);
+ return -1;
+}
+
+int ext4_device_unregister_all(void)
+{
+ memset(s_bdevices, 0, sizeof(s_bdevices));
+
+ return 0;
+}
+
+static bool ext4_is_dots(const u8int *name, usize name_size)
+{
+ if ((name_size == 1) && (name[0] == '.'))
+ return true;
+
+ if ((name_size == 2) && (name[0] == '.') && (name[1] == '.'))
+ return true;
+
+ return false;
+}
+
+static int ext4_has_children(bool *has_children, struct ext4_inode_ref *enode)
+{
+ struct ext4_sblock *sb = &enode->fs->sb;
+
+ /* Check if node is directory */
+ if (!ext4_inode_is_type(sb, enode->inode, EXT4_INODE_MODE_DIRECTORY)) {
+ *has_children = false;
+ return 0;
+ }
+
+ struct ext4_dir_iter it;
+ int rc = ext4_dir_iterator_init(&it, enode, 0);
+ if (rc != 0)
+ return rc;
+
+ /* Find a non-empty directory entry */
+ bool found = false;
+ while (it.curr != nil) {
+ if (ext4_dir_en_get_inode(it.curr) != 0) {
+ u16int nsize;
+ nsize = ext4_dir_en_get_name_len(sb, it.curr);
+ if (!ext4_is_dots(it.curr->name, nsize)) {
+ found = true;
+ break;
+ }
+ }
+
+ rc = ext4_dir_iterator_next(&it);
+ if (rc != 0) {
+ ext4_dir_iterator_fini(&it);
+ return rc;
+ }
+ }
+
+ rc = ext4_dir_iterator_fini(&it);
+ if (rc != 0)
+ return rc;
+
+ *has_children = found;
+
+ return 0;
+}
+
+static int ext4_link(struct ext4_mountpoint *mp, struct ext4_inode_ref *parent,
+ struct ext4_inode_ref *ch, const char *n,
+ u32int len, bool rename)
+{
+ /* Check maximum name length */
+ if (len > EXT4_DIRECTORY_FILENAME_LEN) {
+ werrstr("entry name too long: %s", n);
+ return -1;
+ }
+
+ /* Add entry to parent directory */
+ int r = ext4_dir_add_entry(parent, n, len, ch);
+ if (r != 0)
+ return r;
+
+ /* Fill new dir -> add '.' and '..' entries.
+ * Also newly allocated inode should have 0 link count.
+ */
+
+ bool is_dir = ext4_inode_is_type(&mp->fs.sb, ch->inode,
+ EXT4_INODE_MODE_DIRECTORY);
+ if (is_dir && !rename) {
+ /* Initialize directory index if supported */
+ if (ext4_sb_feature_com(&mp->fs.sb, EXT4_FCOM_DIR_INDEX)) {
+ r = ext4_dir_dx_init(ch, parent);
+ if (r != 0)
+ return r;
+
+ ext4_inode_set_flag(ch->inode, EXT4_INODE_FLAG_INDEX);
+ ch->dirty = true;
+ } else
+
+ r = ext4_dir_add_entry(ch, ".", strlen("."), ch);
+ if (r != 0) {
+ ext4_dir_remove_entry(parent, n, strlen(n));
+ return r;
+ }
+
+ r = ext4_dir_add_entry(ch, "..", strlen(".."), parent);
+ if (r != 0) {
+ ext4_dir_remove_entry(parent, n, strlen(n));
+ ext4_dir_remove_entry(ch, ".", strlen("."));
+ return r;
+ }
+
+ /*New empty directory. Two links (. and ..) */
+ ext4_inode_set_links_cnt(ch->inode, 2);
+ ext4_fs_inode_links_count_inc(parent);
+ ch->dirty = true;
+ parent->dirty = true;
+ return r;
+ }
+ /*
+ * In case we want to rename a directory,
+ * we reset the original '..' pointer.
+ */
+ if (is_dir) {
+ bool idx;
+ idx = ext4_inode_has_flag(ch->inode, EXT4_INODE_FLAG_INDEX);
+ struct ext4_dir_search_result res;
+ if (!idx) {
+ r = ext4_dir_find_entry(&res, ch, "..", strlen(".."));
+ if (r != 0) {
+ werrstr(Eio);
+ return -1;
+ }
+
+ ext4_dir_en_set_inode(res.dentry, parent->index);
+ ext4_trans_set_block_dirty(res.block.buf);
+ r = ext4_dir_destroy_result(ch, &res);
+ if (r != 0)
+ return r;
+
+ } else {
+ r = ext4_dir_dx_reset_parent_inode(ch, parent->index);
+ if (r != 0)
+ return r;
+ }
+
+ ext4_fs_inode_links_count_inc(parent);
+ parent->dirty = true;
+ }
+ if (!rename) {
+ ext4_fs_inode_links_count_inc(ch);
+ ch->dirty = true;
+ }
+
+ return r;
+}
+
+static int ext4_unlink(struct ext4_mountpoint *mp,
+ struct ext4_inode_ref *parent,
+ struct ext4_inode_ref *child, const char *name,
+ u32int name_len)
+{
+ bool has_children;
+ int rc = ext4_has_children(&has_children, child);
+ if (rc != 0)
+ return rc;
+
+ /* Cannot unlink non-empty node */
+ if (has_children) {
+ werrstr("remove -- directory not empty");
+ return -1;
+ }
+
+ /* Remove entry from parent directory */
+ rc = ext4_dir_remove_entry(parent, name, name_len);
+ if (rc != 0)
+ return rc;
+
+ bool is_dir = ext4_inode_is_type(&mp->fs.sb, child->inode,
+ EXT4_INODE_MODE_DIRECTORY);
+
+ /* If directory - handle links from parent */
+ if (is_dir) {
+ ext4_fs_inode_links_count_dec(parent);
+ parent->dirty = true;
+ }
+
+ /*
+ * TODO: Update timestamps of the parent
+ * (when we have wall-clock time).
+ *
+ * ext4_inode_set_change_inode_time(parent->inode, (u32int) now);
+ * ext4_inode_set_modification_time(parent->inode, (u32int) now);
+ * parent->dirty = true;
+ */
+
+ /*
+ * TODO: Update timestamp for inode.
+ *
+ * ext4_inode_set_change_inode_time(child->inode,
+ * (u32int) now);
+ */
+ if (ext4_inode_get_links_cnt(child->inode)) {
+ ext4_fs_inode_links_count_dec(child);
+ child->dirty = true;
+ }
+
+ return 0;
+}
+
+int ext4_mount(const char *dev_name, const char *mount_point,
+ bool read_only)
+{
+ int r;
+ u32int bsize;
+ struct ext4_bcache *bc;
+ struct ext4_blockdev *bd = 0;
+ struct ext4_mountpoint *mp = 0;
+
+ assert(mount_point && dev_name);
+
+ usize mp_len = strlen(mount_point);
+
+ if (mp_len > CONFIG_EXT4_MAX_MP_NAME) {
+ werrstr("mount point name too long: %s", mount_point);
+ return -1;
+ }
+
+ if (mount_point[mp_len - 1] != '/') {
+ werrstr("invalid mount point: %s", mount_point);
+ return -1;
+ }
+
+ for (usize i = 0; i < CONFIG_EXT4_BLOCKDEVS_COUNT; ++i) {
+ if (!strcmp(dev_name, s_bdevices[i].name)) {
+ bd = s_bdevices[i].bd;
+ break;
+ }
+ }
+
+ if (!bd) {
+ werrstr("dev not found: %s", dev_name);
+ return -1;
+ }
+
+ for (usize i = 0; i < CONFIG_EXT4_MOUNTPOINTS_COUNT; ++i) {
+ if (!s_mp[i].mounted) {
+ strcpy(s_mp[i].name, mount_point);
+ s_mp[i].mounted = 1;
+ mp = &s_mp[i];
+ break;
+ }
+
+ if (!strcmp(s_mp[i].name, mount_point))
+ return 0;
+ }
+
+ if (!mp) {
+ werrstr("memory");
+ return -1;
+ }
+
+ r = ext4_block_init(bd);
+ if (r != 0)
+ return r;
+
+ r = ext4_fs_init(&mp->fs, bd, read_only);
+ if (r != 0) {
+ ext4_block_fini(bd);
+ return r;
+ }
+
+ bsize = ext4_sb_get_block_size(&mp->fs.sb);
+ ext4_block_set_lb_size(bd, bsize);
+ bc = &mp->bc;
+
+ r = ext4_bcache_init_dynamic(bc, CONFIG_BLOCK_DEV_CACHE_SIZE, bsize);
+ if (r != 0) {
+ ext4_block_fini(bd);
+ return r;
+ }
+
+ if (bsize != bc->itemsize) {
+ werrstr("unsupported block size: %d", bsize);
+ return -1;
+ }
+
+ /*Bind block cache to block device*/
+ r = ext4_block_bind_bcache(bd, bc);
+ if (r != 0) {
+ ext4_bcache_cleanup(bc);
+ ext4_block_fini(bd);
+ ext4_bcache_fini_dynamic(bc);
+ return r;
+ }
+
+ bd->fs = &mp->fs;
+ return r;
+}
+
+static struct ext4_mountpoint *ext4_get_mount(const char *path)
+{
+ for (usize i = 0; i < CONFIG_EXT4_MOUNTPOINTS_COUNT; ++i) {
+ if (!s_mp[i].mounted)
+ continue;
+ if (!strncmp(s_mp[i].name, path, strlen(s_mp[i].name)))
+ return &s_mp[i];
+ }
+
+ werrstr("mount point not found: %s", path);
+ return nil;
+}
+
+int ext4_umount(const char *mount_point)
+{
+ int r;
+ struct ext4_mountpoint *mp = ext4_get_mount(mount_point);
+
+ if (!mp)
+ return -1;
+
+ r = ext4_fs_fini(&mp->fs);
+ if (r != 0)
+ goto Finish;
+
+ mp->mounted = 0;
+
+ ext4_bcache_cleanup(mp->fs.bdev->bc);
+ ext4_bcache_fini_dynamic(mp->fs.bdev->bc);
+
+ r = ext4_block_fini(mp->fs.bdev);
+Finish:
+ mp->fs.bdev->fs = nil;
+ return r;
+}
+
+int ext4_journal_start(const char *mount_point)
+{
+ int r;
+ struct ext4_mountpoint *mp = ext4_get_mount(mount_point);
+
+ if (!mp)
+ return -1;
+ if (mp->fs.read_only)
+ return 0;
+ if (!ext4_sb_feature_com(&mp->fs.sb, EXT4_FCOM_HAS_JOURNAL))
+ return 0;
+
+ r = jbd_get_fs(&mp->fs, &mp->jbd_fs);
+ if (r != 0)
+ goto Finish;
+
+ r = jbd_journal_start(&mp->jbd_fs, &mp->jbd_journal);
+ if (r != 0) {
+ mp->jbd_fs.dirty = false;
+ jbd_put_fs(&mp->jbd_fs);
+ goto Finish;
+ }
+ mp->fs.jbd_fs = &mp->jbd_fs;
+ mp->fs.jbd_journal = &mp->jbd_journal;
+
+Finish:
+ return r;
+}
+
+int ext4_journal_stop(const char *mount_point)
+{
+ int r;
+ struct ext4_mountpoint *mp = ext4_get_mount(mount_point);
+
+ if (!mp)
+ return -1;
+ if (mp->fs.read_only)
+ return 0;
+ if (!ext4_sb_feature_com(&mp->fs.sb, EXT4_FCOM_HAS_JOURNAL))
+ return 0;
+ r = jbd_journal_stop(&mp->jbd_journal);
+ if (r != 0) {
+ mp->jbd_fs.dirty = false;
+ jbd_put_fs(&mp->jbd_fs);
+ mp->fs.jbd_journal = nil;
+ mp->fs.jbd_fs = nil;
+ goto Finish;
+ }
+
+ r = jbd_put_fs(&mp->jbd_fs);
+ if (r != 0) {
+ mp->fs.jbd_journal = nil;
+ mp->fs.jbd_fs = nil;
+ goto Finish;
+ }
+
+ mp->fs.jbd_journal = nil;
+ mp->fs.jbd_fs = nil;
+
+Finish:
+ return r;
+}
+
+int ext4_recover(const char *mount_point)
+{
+ struct ext4_mountpoint *mp = ext4_get_mount(mount_point);
+ int r;
+
+ if (!mp)
+ return -1;
+
+ EXT4_MP_LOCK(mp);
+ if (!ext4_sb_feature_com(&mp->fs.sb, EXT4_FCOM_HAS_JOURNAL))
+ return 0;
+
+ struct jbd_fs *jbd_fs = ext4_calloc(1, sizeof(struct jbd_fs));
+ if (!jbd_fs) {
+ werrstr("memory");
+ r = -1;
+ goto Finish;
+ }
+
+ r = jbd_get_fs(&mp->fs, jbd_fs);
+ if (r != 0) {
+ ext4_free(jbd_fs);
+ goto Finish;
+ }
+
+ r = jbd_recover(jbd_fs);
+ jbd_put_fs(jbd_fs);
+ ext4_free(jbd_fs);
+
+ if (r == 0 && !mp->fs.read_only) {
+ u32int bgid;
+ u64int free_blocks_count = 0;
+ u32int free_inodes_count = 0;
+ struct ext4_block_group_ref bg_ref;
+
+ /* Update superblock's stats */
+ for (bgid = 0;bgid < ext4_block_group_cnt(&mp->fs.sb);bgid++) {
+ r = ext4_fs_get_block_group_ref(&mp->fs, bgid, &bg_ref);
+ if (r != 0)
+ goto Finish;
+
+ free_blocks_count +=
+ ext4_bg_get_free_blocks_count(bg_ref.block_group,
+ &mp->fs.sb);
+ free_inodes_count +=
+ ext4_bg_get_free_inodes_count(bg_ref.block_group,
+ &mp->fs.sb);
+
+ ext4_fs_put_block_group_ref(&bg_ref);
+ }
+ ext4_sb_set_free_blocks_cnt(&mp->fs.sb, free_blocks_count);
+ ext4_set32(&mp->fs.sb, free_inodes_count, free_inodes_count);
+ /* We don't need to save the superblock stats immediately. */
+ }
+
+Finish:
+ EXT4_MP_UNLOCK(mp);
+ return r;
+}
+
+int ext4_trans_start(struct ext4_mountpoint *mp)
+{
+ int r = 0;
+
+ if (mp->fs.jbd_journal && !mp->fs.curr_trans) {
+ struct jbd_journal *journal = mp->fs.jbd_journal;
+ struct jbd_trans *trans;
+ trans = jbd_journal_new_trans(journal);
+ if (!trans) {
+ werrstr("memory");
+ r = -1;
+ goto Finish;
+ }
+ mp->fs.curr_trans = trans;
+ }
+Finish:
+ return r;
+}
+
+int ext4_trans_stop(struct ext4_mountpoint *mp)
+{
+ int r = 0;
+
+ if (mp->fs.jbd_journal && mp->fs.curr_trans) {
+ struct jbd_journal *journal = mp->fs.jbd_journal;
+ struct jbd_trans *trans = mp->fs.curr_trans;
+ r = jbd_journal_commit_trans(journal, trans);
+ mp->fs.curr_trans = nil;
+ }
+ return r;
+}
+
+void ext4_trans_abort(struct ext4_mountpoint *mp)
+{
+ if (mp->fs.jbd_journal && mp->fs.curr_trans) {
+ struct jbd_journal *journal = mp->fs.jbd_journal;
+ struct jbd_trans *trans = mp->fs.curr_trans;
+ jbd_journal_free_trans(journal, trans, true);
+ mp->fs.curr_trans = nil;
+ }
+}
+
+int ext4_mount_point_stats(const char *mount_point,
+ struct ext4_mount_stats *stats)
+{
+ struct ext4_mountpoint *mp = ext4_get_mount(mount_point);
+
+ if (!mp)
+ return -1;
+
+ EXT4_MP_LOCK(mp);
+ stats->inodes_count = ext4_get32(&mp->fs.sb, inodes_count);
+ stats->free_inodes_count = ext4_get32(&mp->fs.sb, free_inodes_count);
+ stats->blocks_count = ext4_sb_get_blocks_cnt(&mp->fs.sb);
+ stats->free_blocks_count = ext4_sb_get_free_blocks_cnt(&mp->fs.sb);
+ stats->block_size = ext4_sb_get_block_size(&mp->fs.sb);
+
+ stats->block_group_count = ext4_block_group_cnt(&mp->fs.sb);
+ stats->blocks_per_group = ext4_get32(&mp->fs.sb, blocks_per_group);
+ stats->inodes_per_group = ext4_get32(&mp->fs.sb, inodes_per_group);
+
+ memcpy(stats->volume_name, mp->fs.sb.volume_name, 16);
+ EXT4_MP_UNLOCK(mp);
+
+ return 0;
+}
+
+int ext4_mount_setup_locks(const char *mount_point,
+ const struct ext4_lock *locks)
+{
+ u32int i;
+ struct ext4_mountpoint *mp = nil;
+
+ for (i = 0; i < CONFIG_EXT4_MOUNTPOINTS_COUNT; ++i) {
+ if (!strcmp(s_mp[i].name, mount_point)) {
+ mp = &s_mp[i];
+ break;
+ }
+ }
+ if (!mp) {
+ werrstr("mount point not found: %s", mount_point);
+ return -1;
+ }
+
+ mp->os_locks = locks;
+ return 0;
+}
+
+/********************************FILE OPERATIONS*****************************/
+
+static int ext4_path_check(const char *path, bool *is_goal)
+{
+ int i;
+
+ for (i = 0; i < EXT4_DIRECTORY_FILENAME_LEN; ++i) {
+
+ if (path[i] == '/') {
+ *is_goal = false;
+ return i;
+ }
+
+ if (path[i] == 0) {
+ *is_goal = true;
+ return i;
+ }
+ }
+
+ return 0;
+}
+
+static bool ext4_parse_flags(const char *flags, u32int *file_flags)
+{
+ if (!flags)
+ return false;
+
+ if (!strcmp(flags, "r") || !strcmp(flags, "rb")) {
+ *file_flags = O_RDONLY;
+ return true;
+ }
+
+ if (!strcmp(flags, "w") || !strcmp(flags, "wb")) {
+ *file_flags = O_WRONLY | O_CREAT | O_TRUNC;
+ return true;
+ }
+
+ if (!strcmp(flags, "a") || !strcmp(flags, "ab")) {
+ *file_flags = O_WRONLY | O_CREAT | O_APPEND;
+ return true;
+ }
+
+ if (!strcmp(flags, "r+") || !strcmp(flags, "rb+") ||
+ !strcmp(flags, "r+b")) {
+ *file_flags = O_RDWR;
+ return true;
+ }
+
+ if (!strcmp(flags, "w+") || !strcmp(flags, "wb+") ||
+ !strcmp(flags, "w+b")) {
+ *file_flags = O_RDWR | O_CREAT | O_TRUNC;
+ return true;
+ }
+
+ if (!strcmp(flags, "a+") || !strcmp(flags, "ab+") ||
+ !strcmp(flags, "a+b")) {
+ *file_flags = O_RDWR | O_CREAT | O_APPEND;
+ return true;
+ }
+
+ return false;
+}
+
+static int ext4_trunc_inode(struct ext4_mountpoint *mp,
+ u32int index, u64int new_size)
+{
+ int r;
+ struct ext4_fs *const fs = &mp->fs;
+ struct ext4_inode_ref inode_ref;
+ u64int inode_size;
+ bool has_trans = mp->fs.jbd_journal && mp->fs.curr_trans;
+ r = ext4_fs_get_inode_ref(fs, index, &inode_ref);
+ if (r != 0)
+ return r;
+
+ inode_size = ext4_inode_get_size(&fs->sb, inode_ref.inode);
+ ext4_fs_put_inode_ref(&inode_ref);
+ if (has_trans)
+ ext4_trans_stop(mp);
+
+ while (inode_size > new_size + CONFIG_MAX_TRUNCATE_SIZE) {
+
+ inode_size -= CONFIG_MAX_TRUNCATE_SIZE;
+
+ ext4_trans_start(mp);
+ r = ext4_fs_get_inode_ref(fs, index, &inode_ref);
+ if (r != 0) {
+ ext4_trans_abort(mp);
+ break;
+ }
+ r = ext4_fs_truncate_inode(&inode_ref, inode_size);
+ if (r != 0)
+ ext4_fs_put_inode_ref(&inode_ref);
+ else
+ r = ext4_fs_put_inode_ref(&inode_ref);
+
+ if (r != 0) {
+ ext4_trans_abort(mp);
+ goto Finish;
+ } else
+ ext4_trans_stop(mp);
+ }
+
+ if (inode_size > new_size) {
+
+ inode_size = new_size;
+
+ ext4_trans_start(mp);
+ r = ext4_fs_get_inode_ref(fs, index, &inode_ref);
+ if (r != 0) {
+ ext4_trans_abort(mp);
+ goto Finish;
+ }
+ r = ext4_fs_truncate_inode(&inode_ref, inode_size);
+ if (r != 0)
+ ext4_fs_put_inode_ref(&inode_ref);
+ else
+ r = ext4_fs_put_inode_ref(&inode_ref);
+
+ if (r != 0)
+ ext4_trans_abort(mp);
+ else
+ ext4_trans_stop(mp);
+
+ }
+
+Finish:
+
+ if (has_trans)
+ ext4_trans_start(mp);
+
+ return r;
+}
+
+static int ext4_trunc_dir(struct ext4_mountpoint *mp,
+ struct ext4_inode_ref *parent,
+ struct ext4_inode_ref *dir)
+{
+ int r;
+ bool is_dir = ext4_inode_is_type(&mp->fs.sb, dir->inode,
+ EXT4_INODE_MODE_DIRECTORY);
+ u32int block_size = ext4_sb_get_block_size(&mp->fs.sb);
+ if (!is_dir) {
+ werrstr("not a directory");
+ return -1;
+ }
+
+ /* Initialize directory index if supported */
+ if (ext4_sb_feature_com(&mp->fs.sb, EXT4_FCOM_DIR_INDEX)) {
+ r = ext4_dir_dx_init(dir, parent);
+ if (r != 0)
+ return r;
+
+ r = ext4_trunc_inode(mp, dir->index,
+ EXT4_DIR_DX_INIT_BCNT * block_size);
+ if (r != 0)
+ return r;
+ } else {
+ r = ext4_trunc_inode(mp, dir->index, block_size);
+ if (r != 0)
+ return r;
+ }
+
+ return ext4_fs_truncate_inode(dir, 0);
+}
+
+/*
+ * NOTICE: if filetype is equal to EXT4_DIRENTRY_UNKNOWN,
+ * any filetype of the target dir entry will be accepted.
+ */
+static int ext4_generic_open2(ext4_file *f, const char *path, int flags,
+ int ftype, u32int *parent_inode,
+ u32int *name_off)
+{
+ bool is_goal = false;
+ u32int imode = EXT4_INODE_MODE_DIRECTORY;
+ u32int next_inode;
+
+ int r;
+ int len;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ struct ext4_dir_search_result result;
+ struct ext4_inode_ref ref;
+
+ f->mp = 0;
+
+ if (!mp)
+ return -1;
+
+ struct ext4_fs *const fs = &mp->fs;
+ struct ext4_sblock *const sb = &mp->fs.sb;
+
+ if (fs->read_only && flags & O_CREAT) {
+ werrstr(Erdonlyfs);
+ return -1;
+ }
+
+ f->flags = flags;
+
+ /*Skip mount point*/
+ path += strlen(mp->name);
+
+ if (name_off)
+ *name_off = strlen(mp->name);
+
+ /*Load root*/
+ r = ext4_fs_get_inode_ref(fs, EXT4_INODE_ROOT_INDEX, &ref);
+ if (r != 0)
+ return r;
+
+ if (parent_inode)
+ *parent_inode = ref.index;
+
+ while (1) {
+
+ len = ext4_path_check(path, &is_goal);
+ if (!len) {
+ /*If root open was request.*/
+ if (ftype == EXT4_DE_DIR || ftype == EXT4_DE_UNKNOWN)
+ if (is_goal)
+ break;
+
+Notfound:
+ werrstr(Enotfound);
+ r = -1;
+ break;
+ }
+
+ r = ext4_dir_find_entry(&result, &ref, path, len);
+ if (r != 0) {
+
+ /*Destroy last result*/
+ ext4_dir_destroy_result(&ref, &result);
+ if (r != EXT4_ERR_NOT_FOUND)
+ break;
+
+ if (!(f->flags & O_CREAT))
+ break;
+
+ /*O_CREAT allows create new entry*/
+ struct ext4_inode_ref child_ref;
+ r = ext4_fs_alloc_inode(fs, &child_ref,
+ is_goal ? ftype : EXT4_DE_DIR);
+
+ if (r != 0)
+ break;
+
+ ext4_fs_inode_blocks_init(fs, &child_ref);
+
+ /*Link with root dir.*/
+ r = ext4_link(mp, &ref, &child_ref, path, len, false);
+ if (r != 0) {
+ /*Fail. Free new inode.*/
+ ext4_fs_free_inode(&child_ref);
+ /*We do not want to write new inode.
+ But block has to be released.*/
+ child_ref.dirty = false;
+ ext4_fs_put_inode_ref(&child_ref);
+ break;
+ }
+
+ ext4_fs_put_inode_ref(&child_ref);
+ continue;
+ }
+
+ if (parent_inode)
+ *parent_inode = ref.index;
+
+ next_inode = ext4_dir_en_get_inode(result.dentry);
+ if (ext4_sb_feature_incom(sb, EXT4_FINCOM_FILETYPE)) {
+ u8int t;
+ t = ext4_dir_en_get_inode_type(sb, result.dentry);
+ imode = ext4_fs_correspond_inode_mode(t);
+ } else {
+ struct ext4_inode_ref child_ref;
+ r = ext4_fs_get_inode_ref(fs, next_inode, &child_ref);
+ if (r != 0)
+ break;
+
+ imode = ext4_inode_type(sb, child_ref.inode);
+ ext4_fs_put_inode_ref(&child_ref);
+ }
+
+ r = ext4_dir_destroy_result(&ref, &result);
+ if (r != 0)
+ break;
+
+ /*If expected file error*/
+ if (imode != EXT4_INODE_MODE_DIRECTORY && !is_goal)
+ goto Notfound;
+
+ if (ftype != EXT4_DE_UNKNOWN) {
+ bool df = imode != ext4_fs_correspond_inode_mode(ftype);
+ if (df && is_goal)
+ goto Notfound;
+ }
+
+ r = ext4_fs_put_inode_ref(&ref);
+ if (r != 0)
+ break;
+
+ r = ext4_fs_get_inode_ref(fs, next_inode, &ref);
+ if (r != 0)
+ break;
+
+ if (is_goal)
+ break;
+
+ path += len + 1;
+
+ if (name_off)
+ *name_off += len + 1;
+ }
+
+ if (r != 0) {
+ ext4_fs_put_inode_ref(&ref);
+ return r;
+ }
+
+ if (is_goal) {
+ if ((f->flags & O_TRUNC) && (imode == EXT4_INODE_MODE_FILE)) {
+ r = ext4_trunc_inode(mp, ref.index, 0);
+ if (r != 0) {
+ ext4_fs_put_inode_ref(&ref);
+ return r;
+ }
+ }
+
+ f->mp = mp;
+ f->fsize = ext4_inode_get_size(sb, ref.inode);
+ f->inode = ref.index;
+ f->fpos = 0;
+
+ if (f->flags & O_APPEND)
+ f->fpos = f->fsize;
+ }
+
+ return ext4_fs_put_inode_ref(&ref);
+}
+
+/****************************************************************************/
+
+static int ext4_generic_open(ext4_file *f, const char *path, const char *flags,
+ bool file_expect, u32int *parent_inode,
+ u32int *name_off)
+{
+ u32int iflags;
+ int filetype;
+ int r;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+
+ if (ext4_parse_flags(flags, &iflags) == false)
+ return -1;
+
+ if (file_expect == true)
+ filetype = EXT4_DE_REG_FILE;
+ else
+ filetype = EXT4_DE_DIR;
+
+ if (iflags & O_CREAT)
+ ext4_trans_start(mp);
+
+ r = ext4_generic_open2(f, path, iflags, filetype, parent_inode, name_off);
+
+ if (iflags & O_CREAT) {
+ if (r == 0)
+ ext4_trans_stop(mp);
+ else
+ ext4_trans_abort(mp);
+ }
+
+ return r;
+}
+
+static int ext4_create_hardlink(const char *path,
+ struct ext4_inode_ref *child_ref, bool rename)
+{
+ bool is_goal = false;
+ u32int inode_mode;
+ u32int next_inode;
+
+ int r;
+ int len;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ struct ext4_dir_search_result result;
+ struct ext4_inode_ref ref;
+
+ if (!mp)
+ return -1;
+
+ struct ext4_fs *const fs = &mp->fs;
+ struct ext4_sblock *const sb = &mp->fs.sb;
+
+ /*Skip mount point*/
+ path += strlen(mp->name);
+
+ /*Load root*/
+ r = ext4_fs_get_inode_ref(fs, EXT4_INODE_ROOT_INDEX, &ref);
+ if (r != 0)
+ return r;
+
+ while (1) {
+
+ len = ext4_path_check(path, &is_goal);
+ if (!len) {
+ /*If root open was request.*/
+ werrstr(Enotfound);
+ r = -1;
+ break;
+ }
+
+ r = ext4_dir_find_entry(&result, &ref, path, len);
+ if (r != 0) {
+ /*Destroy last result*/
+ ext4_dir_destroy_result(&ref, &result);
+
+ if (r != EXT4_ERR_NOT_FOUND || !is_goal)
+ break;
+
+ /*Link with root dir.*/
+ r = ext4_link(mp, &ref, child_ref, path, len, rename);
+ break;
+ } else if (r == 0 && is_goal) {
+ /*Destroy last result*/
+ ext4_dir_destroy_result(&ref, &result);
+ werrstr(Eexists);
+ r = -1;
+ break;
+ }
+
+ next_inode = result.dentry->inode;
+ if (ext4_sb_feature_incom(sb, EXT4_FINCOM_FILETYPE)) {
+ u8int t;
+ t = ext4_dir_en_get_inode_type(sb, result.dentry);
+ inode_mode = ext4_fs_correspond_inode_mode(t);
+ } else {
+ struct ext4_inode_ref child_ref;
+ r = ext4_fs_get_inode_ref(fs, next_inode, &child_ref);
+ if (r != 0)
+ break;
+
+ inode_mode = ext4_inode_type(sb, child_ref.inode);
+ ext4_fs_put_inode_ref(&child_ref);
+ }
+
+ r = ext4_dir_destroy_result(&ref, &result);
+ if (r != 0)
+ break;
+
+ if (inode_mode != EXT4_INODE_MODE_DIRECTORY) {
+ werrstr(is_goal ? Eexists : Enotfound);
+ r = -1;
+ break;
+ }
+
+ r = ext4_fs_put_inode_ref(&ref);
+ if (r != 0)
+ break;
+
+ r = ext4_fs_get_inode_ref(fs, next_inode, &ref);
+ if (r != 0)
+ break;
+
+ if (is_goal)
+ break;
+
+ path += len + 1;
+ };
+
+ if (r != 0) {
+ ext4_fs_put_inode_ref(&ref);
+ return r;
+ }
+
+ r = ext4_fs_put_inode_ref(&ref);
+ return r;
+}
+
+static int ext4_remove_orig_reference(const char *path, u32int name_off,
+ struct ext4_inode_ref *parent_ref,
+ struct ext4_inode_ref *child_ref)
+{
+ bool is_goal;
+ int r;
+ int len;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+
+ if (!mp)
+ return -1;
+
+ /*Set path*/
+ path += name_off;
+
+ len = ext4_path_check(path, &is_goal);
+
+ /* Remove entry from parent directory */
+ r = ext4_dir_remove_entry(parent_ref, path, len);
+ if (r != 0)
+ goto Finish;
+
+ if (ext4_inode_is_type(&mp->fs.sb, child_ref->inode,
+ EXT4_INODE_MODE_DIRECTORY)) {
+ ext4_fs_inode_links_count_dec(parent_ref);
+ parent_ref->dirty = true;
+ }
+Finish:
+ return r;
+}
+
+int ext4_flink(const char *path, const char *hardlink_path)
+{
+ int r;
+ ext4_file f;
+ u32int name_off;
+ bool child_loaded = false;
+ u32int parent_inode, child_inode;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ struct ext4_mountpoint *target_mp = ext4_get_mount(hardlink_path);
+ struct ext4_inode_ref child_ref;
+
+ if (!mp)
+ return -1;
+
+ if (mp->fs.read_only) {
+ werrstr(Erdonlyfs);
+ return -1;
+ }
+
+ /* Will that happen? */
+ if (mp != target_mp) {
+ werrstr("mount point must be the same: %s vs %s", path, hardlink_path);
+ return -1;
+ }
+
+ EXT4_MP_LOCK(mp);
+ r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, &parent_inode, &name_off);
+ if (r != 0) {
+ EXT4_MP_UNLOCK(mp);
+ return r;
+ }
+
+ child_inode = f.inode;
+ ext4_fclose(&f);
+ ext4_trans_start(mp);
+
+ /*We have file to unlink. Load it.*/
+ r = ext4_fs_get_inode_ref(&mp->fs, child_inode, &child_ref);
+ if (r != 0)
+ goto Finish;
+
+ child_loaded = true;
+
+ /* Creating hardlink for directory is not allowed. */
+ if (ext4_inode_is_type(&mp->fs.sb, child_ref.inode, EXT4_INODE_MODE_DIRECTORY)) {
+ werrstr("is a directory");
+ r = -1;
+ goto Finish;
+ }
+
+ r = ext4_create_hardlink(hardlink_path, &child_ref, false);
+
+Finish:
+ if (child_loaded)
+ ext4_fs_put_inode_ref(&child_ref);
+
+ if (r != 0)
+ ext4_trans_abort(mp);
+ else
+ ext4_trans_stop(mp);
+
+ EXT4_MP_UNLOCK(mp);
+ return r;
+
+}
+
+int ext4_frename(const char *path, const char *new_path)
+{
+ int r;
+ ext4_file f;
+ u32int name_off;
+ bool parent_loaded = false, child_loaded = false;
+ u32int parent_inode, child_inode;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ struct ext4_inode_ref child_ref, parent_ref;
+
+ if (!mp)
+ return -1;
+
+ if (mp->fs.read_only) {
+ werrstr(Erdonlyfs);
+ return -1;
+ }
+
+ EXT4_MP_LOCK(mp);
+
+ r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN,
+ &parent_inode, &name_off);
+ if (r != 0) {
+ EXT4_MP_UNLOCK(mp);
+ return r;
+ }
+
+ child_inode = f.inode;
+ ext4_fclose(&f);
+ ext4_trans_start(mp);
+
+ /*Load parent*/
+ r = ext4_fs_get_inode_ref(&mp->fs, parent_inode, &parent_ref);
+ if (r != 0)
+ goto Finish;
+
+ parent_loaded = true;
+
+ /*We have file to unlink. Load it.*/
+ r = ext4_fs_get_inode_ref(&mp->fs, child_inode, &child_ref);
+ if (r != 0)
+ goto Finish;
+
+ child_loaded = true;
+
+ r = ext4_create_hardlink(new_path, &child_ref, true);
+ if (r != 0)
+ goto Finish;
+
+ r = ext4_remove_orig_reference(path, name_off, &parent_ref, &child_ref);
+ if (r != 0)
+ goto Finish;
+
+Finish:
+ if (parent_loaded)
+ ext4_fs_put_inode_ref(&parent_ref);
+
+ if (child_loaded)
+ ext4_fs_put_inode_ref(&child_ref);
+
+ if (r != 0)
+ ext4_trans_abort(mp);
+ else
+ ext4_trans_stop(mp);
+
+ EXT4_MP_UNLOCK(mp);
+ return r;
+
+}
+
+/****************************************************************************/
+
+int ext4_get_sblock(const char *mount_point, struct ext4_sblock **sb)
+{
+ struct ext4_mountpoint *mp = ext4_get_mount(mount_point);
+
+ if (!mp)
+ return -1;
+
+ *sb = &mp->fs.sb;
+ return 0;
+}
+
+int ext4_cache_write_back(const char *path, bool on)
+{
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ int ret;
+
+ if (!mp)
+ return -1;
+
+ EXT4_MP_LOCK(mp);
+ ret = ext4_block_cache_write_back(mp->fs.bdev, on);
+ EXT4_MP_UNLOCK(mp);
+ return ret;
+}
+
+int ext4_cache_flush(const char *path)
+{
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ int ret;
+
+ if (!mp)
+ return -1;
+
+ EXT4_MP_LOCK(mp);
+ ret = ext4_block_cache_flush(mp->fs.bdev);
+ EXT4_MP_UNLOCK(mp);
+ return ret;
+}
+
+int ext4_fremove(const char *path)
+{
+ ext4_file f;
+ u32int parent_inode;
+ u32int child_inode;
+ u32int name_off;
+ bool is_goal;
+ int r;
+ int len;
+ struct ext4_inode_ref child;
+ struct ext4_inode_ref parent;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+
+ if (!mp)
+ return -1;
+
+ if (mp->fs.read_only) {
+ werrstr(Erdonlyfs);
+ return -1;
+ }
+
+ EXT4_MP_LOCK(mp);
+ r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, &parent_inode, &name_off);
+ if (r != 0) {
+ EXT4_MP_UNLOCK(mp);
+ return r;
+ }
+
+ child_inode = f.inode;
+ ext4_fclose(&f);
+ ext4_trans_start(mp);
+
+ /*Load parent*/
+ r = ext4_fs_get_inode_ref(&mp->fs, parent_inode, &parent);
+ if (r != 0) {
+ ext4_trans_abort(mp);
+ EXT4_MP_UNLOCK(mp);
+ return r;
+ }
+
+ /*We have file to delete. Load it.*/
+ r = ext4_fs_get_inode_ref(&mp->fs, child_inode, &child);
+ if (r != 0) {
+ ext4_fs_put_inode_ref(&parent);
+ ext4_trans_abort(mp);
+ EXT4_MP_UNLOCK(mp);
+ return r;
+ }
+ /* We do not allow opening files here. */
+ if (ext4_inode_type(&mp->fs.sb, child.inode) ==
+ EXT4_INODE_MODE_DIRECTORY) {
+ ext4_fs_put_inode_ref(&parent);
+ ext4_fs_put_inode_ref(&child);
+ ext4_trans_abort(mp);
+ EXT4_MP_UNLOCK(mp);
+ return r;
+ }
+
+ /*Link count will be zero, the inode should be freed. */
+ if (ext4_inode_get_links_cnt(child.inode) == 1) {
+ ext4_block_cache_write_back(mp->fs.bdev, 1);
+ r = ext4_trunc_inode(mp, child.index, 0);
+ if (r != 0) {
+ ext4_fs_put_inode_ref(&parent);
+ ext4_fs_put_inode_ref(&child);
+ ext4_trans_abort(mp);
+ EXT4_MP_UNLOCK(mp);
+ return r;
+ }
+ ext4_block_cache_write_back(mp->fs.bdev, 0);
+ }
+
+ /*Set path*/
+ path += name_off;
+
+ len = ext4_path_check(path, &is_goal);
+
+ /*Unlink from parent*/
+ r = ext4_unlink(mp, &parent, &child, path, len);
+ if (r != 0)
+ goto Finish;
+
+ /*Link count is zero, the inode should be freed. */
+ if (!ext4_inode_get_links_cnt(child.inode)) {
+ ext4_inode_set_del_time(child.inode, -1L);
+
+ r = ext4_fs_free_inode(&child);
+ if (r != 0)
+ goto Finish;
+ }
+
+Finish:
+ ext4_fs_put_inode_ref(&child);
+ ext4_fs_put_inode_ref(&parent);
+
+ if (r != 0)
+ ext4_trans_abort(mp);
+ else
+ ext4_trans_stop(mp);
+
+ EXT4_MP_UNLOCK(mp);
+ return r;
+}
+
+int ext4_fopen(ext4_file *file, const char *path, const char *flags)
+{
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ int r;
+
+ if (!mp)
+ return -1;
+
+ EXT4_MP_LOCK(mp);
+
+ ext4_block_cache_write_back(mp->fs.bdev, 1);
+ r = ext4_generic_open(file, path, flags, true, 0, 0);
+ ext4_block_cache_write_back(mp->fs.bdev, 0);
+
+ EXT4_MP_UNLOCK(mp);
+ return r;
+}
+
+int ext4_fopen2(ext4_file *file, const char *path, int flags)
+{
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ int r;
+ int filetype;
+
+ if (!mp)
+ return -1;
+
+ filetype = EXT4_DE_REG_FILE;
+
+ EXT4_MP_LOCK(mp);
+ ext4_block_cache_write_back(mp->fs.bdev, 1);
+
+ if (flags & O_CREAT)
+ ext4_trans_start(mp);
+
+ r = ext4_generic_open2(file, path, flags, filetype, nil, nil);
+
+ if (flags & O_CREAT) {
+ if (r == 0)
+ ext4_trans_stop(mp);
+ else
+ ext4_trans_abort(mp);
+ }
+
+ ext4_block_cache_write_back(mp->fs.bdev, 0);
+ EXT4_MP_UNLOCK(mp);
+
+ return r;
+}
+
+int ext4_fclose(ext4_file *file)
+{
+ assert(file && file->mp);
+
+ file->mp = 0;
+ file->flags = 0;
+ file->inode = 0;
+ file->fpos = 0;
+ file->fsize = 0;
+
+ return 0;
+}
+
+static int ext4_ftruncate_no_lock(ext4_file *file, u64int size)
+{
+ struct ext4_inode_ref ref;
+ int r;
+
+
+ r = ext4_fs_get_inode_ref(&file->mp->fs, file->inode, &ref);
+ if (r != 0) {
+ EXT4_MP_UNLOCK(file->mp);
+ return r;
+ }
+
+ /*Sync file size*/
+ file->fsize = ext4_inode_get_size(&file->mp->fs.sb, ref.inode);
+ if (file->fsize <= size) {
+ werrstr("space preallocation not supported");
+ r = -1;
+ goto Finish;
+ }
+
+ /*Start write back cache mode.*/
+ r = ext4_block_cache_write_back(file->mp->fs.bdev, 1);
+ if (r != 0)
+ goto Finish;
+
+ r = ext4_trunc_inode(file->mp, ref.index, size);
+ if (r != 0)
+ goto Finish;
+
+ file->fsize = size;
+ if (file->fpos > size)
+ file->fpos = size;
+
+ /*Stop write back cache mode*/
+ ext4_block_cache_write_back(file->mp->fs.bdev, 0);
+
+ if (r != 0)
+ goto Finish;
+
+Finish:
+ ext4_fs_put_inode_ref(&ref);
+ return r;
+
+}
+
+int ext4_ftruncate(ext4_file *f, u64int size)
+{
+ int r;
+ assert(f && f->mp);
+
+ if (f->mp->fs.read_only) {
+ werrstr(Erdonlyfs);
+ return -1;
+ }
+
+ if (f->flags & O_RDONLY) {
+ werrstr(Eperm);
+ return -1;
+ }
+
+ EXT4_MP_LOCK(f->mp);
+
+ ext4_trans_start(f->mp);
+
+ r = ext4_ftruncate_no_lock(f, size);
+
+ if( r == 0 )
+ ext4_trans_stop(f->mp);
+ else
+ ext4_trans_abort(f->mp);
+
+ EXT4_MP_UNLOCK(f->mp);
+ return r;
+}
+
+int ext4_fread(ext4_file *file, void *buf, usize size, usize *rcnt)
+{
+ u32int unalg;
+ u32int iblock_idx;
+ u32int iblock_last;
+ u32int block_size;
+
+ ext4_fsblk_t fblock;
+ ext4_fsblk_t fblock_start;
+ u32int fblock_count;
+
+ u8int *u8_buf = buf;
+ int r;
+ struct ext4_inode_ref ref;
+
+ assert(file && file->mp);
+
+ if (file->flags & O_WRONLY) {
+ werrstr(Eperm);
+ return -1;
+ }
+
+ if (!size)
+ return 0;
+
+ EXT4_MP_LOCK(file->mp);
+
+ struct ext4_fs *const fs = &file->mp->fs;
+ struct ext4_sblock *const sb = &file->mp->fs.sb;
+
+ if (rcnt)
+ *rcnt = 0;
+
+ r = ext4_fs_get_inode_ref(fs, file->inode, &ref);
+ if (r != 0) {
+ EXT4_MP_UNLOCK(file->mp);
+ return r;
+ }
+
+ /*Sync file size*/
+ file->fsize = ext4_inode_get_size(sb, ref.inode);
+
+ block_size = ext4_sb_get_block_size(sb);
+ size = ((u64int)size > (file->fsize - file->fpos))
+ ? ((usize)(file->fsize - file->fpos)) : size;
+
+ iblock_idx = (u32int)((file->fpos) / block_size);
+ iblock_last = (u32int)((file->fpos + size) / block_size);
+ unalg = (file->fpos) % block_size;
+
+ /*If the size of symlink is smaller than 60 bytes*/
+ bool softlink;
+ softlink = ext4_inode_is_type(sb, ref.inode, EXT4_INODE_MODE_SOFTLINK);
+ if (softlink && file->fsize < sizeof(ref.inode->blocks)
+ && !ext4_inode_get_blocks_count(sb, ref.inode)) {
+
+ char *content = (char *)ref.inode->blocks;
+ if (file->fpos < file->fsize) {
+ usize len = size;
+ if (unalg + size > (u32int)file->fsize)
+ len = (u32int)file->fsize - unalg;
+ memcpy(buf, content + unalg, len);
+ if (rcnt)
+ *rcnt = len;
+
+ }
+
+ r = 0;
+ goto Finish;
+ }
+
+ if (unalg) {
+ usize len = size;
+ if (size > (block_size - unalg))
+ len = block_size - unalg;
+
+ r = ext4_fs_get_inode_dblk_idx(&ref, iblock_idx, &fblock, true);
+ if (r != 0)
+ goto Finish;
+
+ /* Do we get an unwritten range? */
+ if (fblock != 0) {
+ u64int off = fblock * block_size + unalg;
+ r = ext4_block_readbytes(file->mp->fs.bdev, off, u8_buf, len);
+ if (r != 0)
+ goto Finish;
+
+ } else {
+ /* Yes, we do. */
+ memset(u8_buf, 0, len);
+ }
+
+ u8_buf += len;
+ size -= len;
+ file->fpos += len;
+
+ if (rcnt)
+ *rcnt += len;
+
+ iblock_idx++;
+ }
+
+ fblock_start = 0;
+ fblock_count = 0;
+ while (size >= block_size) {
+ while (iblock_idx < iblock_last) {
+ r = ext4_fs_get_inode_dblk_idx(&ref, iblock_idx,
+ &fblock, true);
+ if (r != 0)
+ goto Finish;
+
+ iblock_idx++;
+
+ if (!fblock_start)
+ fblock_start = fblock;
+
+ if ((fblock_start + fblock_count) != fblock)
+ break;
+
+ fblock_count++;
+ }
+
+ r = ext4_blocks_get_direct(file->mp->fs.bdev, u8_buf, fblock_start,
+ fblock_count);
+ if (r != 0)
+ goto Finish;
+
+ size -= block_size * fblock_count;
+ u8_buf += block_size * fblock_count;
+ file->fpos += block_size * fblock_count;
+
+ if (rcnt)
+ *rcnt += block_size * fblock_count;
+
+ fblock_start = fblock;
+ fblock_count = 1;
+ }
+
+ if (size) {
+ u64int off;
+ r = ext4_fs_get_inode_dblk_idx(&ref, iblock_idx, &fblock, true);
+ if (r != 0)
+ goto Finish;
+
+ off = fblock * block_size;
+ r = ext4_block_readbytes(file->mp->fs.bdev, off, u8_buf, size);
+ if (r != 0)
+ goto Finish;
+
+ file->fpos += size;
+
+ if (rcnt)
+ *rcnt += size;
+ }
+
+Finish:
+ ext4_fs_put_inode_ref(&ref);
+ EXT4_MP_UNLOCK(file->mp);
+ return r;
+}
+
+int ext4_fwrite(ext4_file *file, const void *buf, usize size, usize *wcnt)
+{
+ u32int unalg;
+ u32int iblk_idx;
+ u32int iblock_last;
+ u32int ifile_blocks;
+ u32int block_size;
+
+ u32int fblock_count;
+ ext4_fsblk_t fblk;
+ ext4_fsblk_t fblock_start;
+
+ struct ext4_inode_ref ref;
+ const u8int *u8_buf = buf;
+ int r, rr = 0;
+
+ assert(file && file->mp);
+
+ if (file->mp->fs.read_only) {
+ werrstr(Erdonlyfs);
+ return -1;
+ }
+
+ if (file->flags & O_RDONLY) {
+ werrstr(Eperm);
+ return -1;
+ }
+
+ if (!size)
+ return 0;
+
+ EXT4_MP_LOCK(file->mp);
+ ext4_trans_start(file->mp);
+
+ struct ext4_fs *const fs = &file->mp->fs;
+ struct ext4_sblock *const sb = &file->mp->fs.sb;
+
+ if (wcnt)
+ *wcnt = 0;
+
+ r = ext4_fs_get_inode_ref(fs, file->inode, &ref);
+ if (r != 0) {
+ ext4_trans_abort(file->mp);
+ EXT4_MP_UNLOCK(file->mp);
+ return r;
+ }
+
+ /*Sync file size*/
+ file->fsize = ext4_inode_get_size(sb, ref.inode);
+ block_size = ext4_sb_get_block_size(sb);
+
+ iblock_last = (u32int)((file->fpos + size) / block_size);
+ iblk_idx = (u32int)(file->fpos / block_size);
+ ifile_blocks = (u32int)((file->fsize + block_size - 1) / block_size);
+
+ unalg = (file->fpos) % block_size;
+
+ if (unalg) {
+ usize len = size;
+ u64int off;
+ if (size > (block_size - unalg))
+ len = block_size - unalg;
+
+ r = ext4_fs_init_inode_dblk_idx(&ref, iblk_idx, &fblk);
+ if (r != 0)
+ goto Finish;
+
+ off = fblk * block_size + unalg;
+ r = ext4_block_writebytes(file->mp->fs.bdev, off, u8_buf, len);
+ if (r != 0)
+ goto Finish;
+
+ u8_buf += len;
+ size -= len;
+ file->fpos += len;
+
+ if (wcnt)
+ *wcnt += len;
+
+ iblk_idx++;
+ }
+
+ /*Start write back cache mode.*/
+ r = ext4_block_cache_write_back(file->mp->fs.bdev, 1);
+ if (r != 0)
+ goto Finish;
+
+ fblock_start = 0;
+ fblock_count = 0;
+ while (size >= block_size) {
+
+ while (iblk_idx < iblock_last) {
+ if (iblk_idx < ifile_blocks) {
+ r = ext4_fs_init_inode_dblk_idx(&ref, iblk_idx,
+ &fblk);
+ if (r != 0)
+ goto Finish;
+ } else {
+ rr = ext4_fs_append_inode_dblk(&ref, &fblk,
+ &iblk_idx);
+ if (rr != 0) {
+ /* Unable to append more blocks. But
+ * some block might be allocated already
+ * */
+ break;
+ }
+ }
+
+ iblk_idx++;
+
+ if (!fblock_start) {
+ fblock_start = fblk;
+ }
+
+ if ((fblock_start + fblock_count) != fblk)
+ break;
+
+ fblock_count++;
+ }
+
+ r = ext4_blocks_set_direct(file->mp->fs.bdev, u8_buf, fblock_start,
+ fblock_count);
+ if (r != 0)
+ break;
+
+ size -= block_size * fblock_count;
+ u8_buf += block_size * fblock_count;
+ file->fpos += block_size * fblock_count;
+
+ if (wcnt)
+ *wcnt += block_size * fblock_count;
+
+ fblock_start = fblk;
+ fblock_count = 1;
+
+ if (rr != 0) {
+ /*ext4_fs_append_inode_block has failed and no
+ * more blocks might be written. But node size
+ * should be updated.*/
+ /* FIXME wth is happening here exactly? */
+ //r = rr;
+ goto out_fsize;
+ }
+ }
+
+ /*Stop write back cache mode*/
+ ext4_block_cache_write_back(file->mp->fs.bdev, 0);
+
+ if (r != 0)
+ goto Finish;
+
+ if (size) {
+ u64int off;
+ if (iblk_idx < ifile_blocks) {
+ r = ext4_fs_init_inode_dblk_idx(&ref, iblk_idx, &fblk);
+ if (r != 0)
+ goto Finish;
+ } else {
+ r = ext4_fs_append_inode_dblk(&ref, &fblk, &iblk_idx);
+ if (r != 0)
+ /*Node size sholud be updated.*/
+ goto out_fsize;
+ }
+
+ off = fblk * block_size;
+ r = ext4_block_writebytes(file->mp->fs.bdev, off, u8_buf, size);
+ if (r != 0)
+ goto Finish;
+
+ file->fpos += size;
+
+ if (wcnt)
+ *wcnt += size;
+ }
+
+out_fsize:
+ if (file->fpos > file->fsize) {
+ file->fsize = file->fpos;
+ ext4_inode_set_size(ref.inode, file->fsize);
+ ref.dirty = true;
+ }
+
+Finish:
+ r = ext4_fs_put_inode_ref(&ref);
+
+ if (r != 0)
+ ext4_trans_abort(file->mp);
+ else
+ ext4_trans_stop(file->mp);
+
+ EXT4_MP_UNLOCK(file->mp);
+ return r;
+}
+
+int ext4_fseek(ext4_file *file, s64int offset, u32int origin)
+{
+ switch (origin) {
+ case 0:
+ if (offset < 0 || (u64int)offset > file->fsize)
+ break;
+
+ file->fpos = offset;
+ return 0;
+ case 1:
+ if ((offset < 0 && (u64int)(-offset) > file->fpos) ||
+ (offset > 0 &&
+ (u64int)offset > (file->fsize - file->fpos)))
+ break;
+
+ file->fpos += offset;
+ return 0;
+ case 2:
+ if (offset < 0 || (u64int)offset > file->fsize)
+ break;
+
+ file->fpos = file->fsize - offset;
+ return 0;
+ }
+
+ werrstr(Einval);
+ return -1;
+}
+
+u64int ext4_ftell(ext4_file *file)
+{
+ return file->fpos;
+}
+
+u64int ext4_fsize(ext4_file *file)
+{
+ return file->fsize;
+}
+
+
+static int ext4_trans_get_inode_ref(const char *path,
+ struct ext4_mountpoint *mp,
+ struct ext4_inode_ref *inode_ref)
+{
+ int r;
+ ext4_file f;
+
+ r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);
+ if (r != 0)
+ return r;
+
+ ext4_trans_start(mp);
+
+ r = ext4_fs_get_inode_ref(&mp->fs, f.inode, inode_ref);
+ if (r != 0) {
+ ext4_trans_abort(mp);
+ return r;
+ }
+
+ return r;
+}
+
+static int ext4_trans_put_inode_ref(struct ext4_mountpoint *mp,
+ struct ext4_inode_ref *inode_ref)
+{
+ int r;
+
+ r = ext4_fs_put_inode_ref(inode_ref);
+ if (r != 0)
+ ext4_trans_abort(mp);
+ else
+ ext4_trans_stop(mp);
+
+ return r;
+}
+
+
+int ext4_raw_inode_fill(const char *path, u32int *ret_ino,
+ struct ext4_inode *inode)
+{
+ int r;
+ ext4_file f;
+ struct ext4_inode_ref inode_ref;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+
+ if (!mp)
+ return -1;
+
+ EXT4_MP_LOCK(mp);
+
+ r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);
+ if (r != 0) {
+ EXT4_MP_UNLOCK(mp);
+ return r;
+ }
+
+ /*Load parent*/
+ r = ext4_fs_get_inode_ref(&mp->fs, f.inode, &inode_ref);
+ if (r != 0) {
+ EXT4_MP_UNLOCK(mp);
+ return r;
+ }
+
+ if (ret_ino)
+ *ret_ino = f.inode;
+ if (inode)
+ memcpy(inode, inode_ref.inode, sizeof(struct ext4_inode));
+
+ ext4_fs_put_inode_ref(&inode_ref);
+ EXT4_MP_UNLOCK(mp);
+
+ return r;
+}
+
+int ext4_inode_exist(const char *path, int type)
+{
+ int r;
+ ext4_file f;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+
+ if (!mp)
+ return -1;
+
+ EXT4_MP_LOCK(mp);
+ r = ext4_generic_open2(&f, path, O_RDONLY, type, nil, nil);
+ EXT4_MP_UNLOCK(mp);
+
+ return r;
+}
+
+int ext4_mode_set(const char *path, u32int mode)
+{
+ int r;
+ u32int orig_mode;
+ struct ext4_inode_ref inode_ref;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+
+ if (!mp)
+ return -1;
+
+ if (mp->fs.read_only) {
+ werrstr(Erdonlyfs);
+ return -1;
+ }
+
+ EXT4_MP_LOCK(mp);
+
+ r = ext4_trans_get_inode_ref(path, mp, &inode_ref);
+ if (r != 0)
+ goto Finish;
+
+ orig_mode = ext4_inode_get_mode(&mp->fs.sb, inode_ref.inode);
+ orig_mode &= ~0xFFF;
+ orig_mode |= mode & 0xFFF;
+ ext4_inode_set_mode(&mp->fs.sb, inode_ref.inode, orig_mode);
+
+ inode_ref.dirty = true;
+ r = ext4_trans_put_inode_ref(mp, &inode_ref);
+
+ Finish:
+ EXT4_MP_UNLOCK(mp);
+
+ return r;
+}
+
+int ext4_owner_set(const char *path, u32int uid, u32int gid)
+{
+ int r;
+ struct ext4_inode_ref inode_ref;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+
+ if (!mp)
+ return -1;
+
+ if (mp->fs.read_only) {
+ werrstr(Erdonlyfs);
+ return -1;
+ }
+
+ EXT4_MP_LOCK(mp);
+
+ r = ext4_trans_get_inode_ref(path, mp, &inode_ref);
+ if (r != 0)
+ goto Finish;
+
+ ext4_inode_set_uid(inode_ref.inode, uid);
+ ext4_inode_set_gid(inode_ref.inode, gid);
+
+ inode_ref.dirty = true;
+ r = ext4_trans_put_inode_ref(mp, &inode_ref);
+
+ Finish:
+ EXT4_MP_UNLOCK(mp);
+
+ return r;
+}
+
+int ext4_mode_get(const char *path, u32int *mode)
+{
+ struct ext4_inode_ref inode_ref;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ ext4_file f;
+ int r;
+
+ if (!mp)
+ return -1;
+
+ EXT4_MP_LOCK(mp);
+
+ r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);
+ if (r != 0)
+ goto Finish;
+
+ r = ext4_fs_get_inode_ref(&mp->fs, f.inode, &inode_ref);
+ if (r != 0)
+ goto Finish;
+
+ *mode = ext4_inode_get_mode(&mp->fs.sb, inode_ref.inode);
+ r = ext4_fs_put_inode_ref(&inode_ref);
+
+ Finish:
+ EXT4_MP_UNLOCK(mp);
+
+ return r;
+}
+
+int ext4_owner_get(const char *path, u32int *uid, u32int *gid)
+{
+ struct ext4_inode_ref inode_ref;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ ext4_file f;
+ int r;
+
+ if (!mp)
+ return -1;
+
+ EXT4_MP_LOCK(mp);
+
+ r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);
+ if (r != 0)
+ goto Finish;
+
+ r = ext4_fs_get_inode_ref(&mp->fs, f.inode, &inode_ref);
+ if (r != 0)
+ goto Finish;
+
+ *uid = ext4_inode_get_uid(inode_ref.inode);
+ *gid = ext4_inode_get_gid(inode_ref.inode);
+ r = ext4_fs_put_inode_ref(&inode_ref);
+
+ Finish:
+ EXT4_MP_UNLOCK(mp);
+
+ return r;
+}
+
+int ext4_atime_set(const char *path, u32int atime)
+{
+ struct ext4_inode_ref inode_ref;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ int r;
+
+ if (!mp)
+ return -1;
+
+ if (mp->fs.read_only) {
+ werrstr(Erdonlyfs);
+ return -1;
+ }
+
+ EXT4_MP_LOCK(mp);
+
+ r = ext4_trans_get_inode_ref(path, mp, &inode_ref);
+ if (r != 0)
+ goto Finish;
+
+ ext4_inode_set_access_time(inode_ref.inode, atime);
+ inode_ref.dirty = true;
+ r = ext4_trans_put_inode_ref(mp, &inode_ref);
+
+ Finish:
+ EXT4_MP_UNLOCK(mp);
+
+ return r;
+}
+
+int ext4_mtime_set(const char *path, u32int mtime)
+{
+ struct ext4_inode_ref inode_ref;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ int r;
+
+ if (!mp)
+ return -1;
+
+ if (mp->fs.read_only) {
+ werrstr(Erdonlyfs);
+ return -1;
+ }
+
+ EXT4_MP_LOCK(mp);
+
+ r = ext4_trans_get_inode_ref(path, mp, &inode_ref);
+ if (r != 0)
+ goto Finish;
+
+ ext4_inode_set_modif_time(inode_ref.inode, mtime);
+ inode_ref.dirty = true;
+ r = ext4_trans_put_inode_ref(mp, &inode_ref);
+
+ Finish:
+ EXT4_MP_UNLOCK(mp);
+
+ return r;
+}
+
+int ext4_ctime_set(const char *path, u32int ctime)
+{
+ struct ext4_inode_ref inode_ref;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ int r;
+
+ if (!mp)
+ return -1;
+
+ if (mp->fs.read_only) {
+ werrstr(Erdonlyfs);
+ return -1;
+ }
+
+ EXT4_MP_LOCK(mp);
+
+ r = ext4_trans_get_inode_ref(path, mp, &inode_ref);
+ if (r != 0)
+ goto Finish;
+
+ ext4_inode_set_change_inode_time(inode_ref.inode, ctime);
+ inode_ref.dirty = true;
+ r = ext4_trans_put_inode_ref(mp, &inode_ref);
+
+ Finish:
+ EXT4_MP_UNLOCK(mp);
+
+ return r;
+}
+
+int ext4_atime_get(const char *path, u32int *atime)
+{
+ struct ext4_inode_ref inode_ref;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ ext4_file f;
+ int r;
+
+ if (!mp)
+ return -1;
+
+ EXT4_MP_LOCK(mp);
+
+ r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);
+ if (r != 0)
+ goto Finish;
+
+ r = ext4_fs_get_inode_ref(&mp->fs, f.inode, &inode_ref);
+ if (r != 0)
+ goto Finish;
+
+ *atime = ext4_inode_get_access_time(inode_ref.inode);
+ r = ext4_fs_put_inode_ref(&inode_ref);
+
+ Finish:
+ EXT4_MP_UNLOCK(mp);
+
+ return r;
+}
+
+int ext4_mtime_get(const char *path, u32int *mtime)
+{
+ struct ext4_inode_ref inode_ref;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ ext4_file f;
+ int r;
+
+ if (!mp)
+ return -1;
+
+ EXT4_MP_LOCK(mp);
+
+ r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);
+ if (r != 0)
+ goto Finish;
+
+ r = ext4_fs_get_inode_ref(&mp->fs, f.inode, &inode_ref);
+ if (r != 0)
+ goto Finish;
+
+ *mtime = ext4_inode_get_modif_time(inode_ref.inode);
+ r = ext4_fs_put_inode_ref(&inode_ref);
+
+ Finish:
+ EXT4_MP_UNLOCK(mp);
+
+ return r;
+}
+
+int ext4_ctime_get(const char *path, u32int *ctime)
+{
+ struct ext4_inode_ref inode_ref;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ ext4_file f;
+ int r;
+
+ if (!mp)
+ return -1;
+
+ EXT4_MP_LOCK(mp);
+
+ r = ext4_generic_open2(&f, path, O_RDONLY, EXT4_DE_UNKNOWN, nil, nil);
+ if (r != 0)
+ goto Finish;
+
+ r = ext4_fs_get_inode_ref(&mp->fs, f.inode, &inode_ref);
+ if (r != 0)
+ goto Finish;
+
+ *ctime = ext4_inode_get_change_inode_time(inode_ref.inode);
+ r = ext4_fs_put_inode_ref(&inode_ref);
+
+ Finish:
+ EXT4_MP_UNLOCK(mp);
+
+ return r;
+}
+
+static int ext4_fsymlink_set(ext4_file *f, const void *buf, u32int size)
+{
+ struct ext4_inode_ref ref;
+ u32int sblock;
+ ext4_fsblk_t fblock;
+ u32int block_size;
+ int r;
+
+ assert(f && f->mp);
+
+ if (!size)
+ return 0;
+
+ r = ext4_fs_get_inode_ref(&f->mp->fs, f->inode, &ref);
+ if (r != 0)
+ return r;
+
+ /*Sync file size*/
+ block_size = ext4_sb_get_block_size(&f->mp->fs.sb);
+ if (size > block_size) {
+ werrstr("invalid block size");
+ r = -1;
+ goto Finish;
+ }
+ r = ext4_ftruncate_no_lock(f, 0);
+ if (r != 0)
+ goto Finish;
+
+ /*Start write back cache mode.*/
+ r = ext4_block_cache_write_back(f->mp->fs.bdev, 1);
+ if (r != 0)
+ goto Finish;
+
+ /*If the size of symlink is smaller than 60 bytes*/
+ if (size < sizeof(ref.inode->blocks)) {
+ memset(ref.inode->blocks, 0, sizeof(ref.inode->blocks));
+ memcpy(ref.inode->blocks, buf, size);
+ ext4_inode_clear_flag(ref.inode, EXT4_INODE_FLAG_EXTENTS);
+ } else {
+ u64int off;
+ ext4_fs_inode_blocks_init(&f->mp->fs, &ref);
+ r = ext4_fs_append_inode_dblk(&ref, &fblock, &sblock);
+ if (r != 0)
+ goto Finish;
+
+ off = fblock * block_size;
+ r = ext4_block_writebytes(f->mp->fs.bdev, off, buf, size);
+ if (r != 0)
+ goto Finish;
+ }
+
+ /*Stop write back cache mode*/
+ ext4_block_cache_write_back(f->mp->fs.bdev, 0);
+
+ if (r != 0)
+ goto Finish;
+
+ ext4_inode_set_size(ref.inode, size);
+ ref.dirty = true;
+
+ f->fsize = size;
+ if (f->fpos > size)
+ f->fpos = size;
+
+Finish:
+ ext4_fs_put_inode_ref(&ref);
+ return r;
+}
+
+int ext4_fsymlink(const char *target, const char *path)
+{
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ int r;
+ ext4_file f;
+ int filetype;
+
+ if (!mp)
+ return -1;
+
+ if (mp->fs.read_only) {
+ werrstr(Erdonlyfs);
+ return -1;
+ }
+
+ filetype = EXT4_DE_SYMLINK;
+
+ EXT4_MP_LOCK(mp);
+ ext4_block_cache_write_back(mp->fs.bdev, 1);
+ ext4_trans_start(mp);
+
+ r = ext4_generic_open2(&f, path, O_RDWR | O_CREAT, filetype, nil, nil);
+ if (r == 0)
+ r = ext4_fsymlink_set(&f, target, strlen(target));
+ else
+ goto Finish;
+
+ ext4_fclose(&f);
+
+Finish:
+ if (r != 0)
+ ext4_trans_abort(mp);
+ else
+ ext4_trans_stop(mp);
+
+ ext4_block_cache_write_back(mp->fs.bdev, 0);
+ EXT4_MP_UNLOCK(mp);
+ return r;
+}
+
+int ext4_readlink(const char *path, char *buf, usize bufsize, usize *rcnt)
+{
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ int r;
+ ext4_file f;
+ int filetype;
+
+ assert(buf != nil);
+
+ if (!mp)
+ return -1;
+
+ filetype = EXT4_DE_SYMLINK;
+
+ EXT4_MP_LOCK(mp);
+ ext4_block_cache_write_back(mp->fs.bdev, 1);
+ r = ext4_generic_open2(&f, path, O_RDONLY, filetype, nil, nil);
+ if (r == 0)
+ r = ext4_fread(&f, buf, bufsize, rcnt);
+ else
+ goto Finish;
+
+ ext4_fclose(&f);
+
+Finish:
+ ext4_block_cache_write_back(mp->fs.bdev, 0);
+ EXT4_MP_UNLOCK(mp);
+ return r;
+}
+
+static int ext4_mknod_set(ext4_file *f, u32int dev)
+{
+ struct ext4_inode_ref ref;
+ int r;
+
+ assert(f && f->mp);
+
+ r = ext4_fs_get_inode_ref(&f->mp->fs, f->inode, &ref);
+ if (r != 0)
+ return r;
+
+ ext4_inode_set_dev(ref.inode, dev);
+
+ ext4_inode_set_size(ref.inode, 0);
+ ref.dirty = true;
+
+ f->fsize = 0;
+ f->fpos = 0;
+
+ r = ext4_fs_put_inode_ref(&ref);
+ return r;
+}
+
+int ext4_mknod(const char *path, int filetype, u32int dev)
+{
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ int r;
+ ext4_file f;
+
+ if (!mp)
+ return -1;
+
+ if (mp->fs.read_only) {
+ werrstr(Erdonlyfs);
+ return -1;
+ }
+ /*
+ * The filetype shouldn't be normal file, directory or
+ * unknown.
+ */
+ if (filetype == EXT4_DE_UNKNOWN ||
+ filetype == EXT4_DE_REG_FILE ||
+ filetype == EXT4_DE_DIR ||
+ filetype == EXT4_DE_SYMLINK) {
+ werrstr(Einval);
+ return -1;
+ }
+
+ /*
+ * Nor should it be any bogus value.
+ */
+ if (filetype != EXT4_DE_CHRDEV &&
+ filetype != EXT4_DE_BLKDEV &&
+ filetype != EXT4_DE_FIFO &&
+ filetype != EXT4_DE_SOCK) {
+ werrstr(Einval);
+ return -1;
+ }
+
+ EXT4_MP_LOCK(mp);
+ ext4_block_cache_write_back(mp->fs.bdev, 1);
+ ext4_trans_start(mp);
+
+ r = ext4_generic_open2(&f, path, O_RDWR | O_CREAT, filetype, nil, nil);
+ if (r == 0) {
+ if (filetype == EXT4_DE_CHRDEV ||
+ filetype == EXT4_DE_BLKDEV)
+ r = ext4_mknod_set(&f, dev);
+ } else {
+ goto Finish;
+ }
+
+ ext4_fclose(&f);
+
+Finish:
+ if (r != 0)
+ ext4_trans_abort(mp);
+ else
+ ext4_trans_stop(mp);
+
+ ext4_block_cache_write_back(mp->fs.bdev, 0);
+ EXT4_MP_UNLOCK(mp);
+ return r;
+}
+
+/*********************************DIRECTORY OPERATION************************/
+
+int ext4_dir_rm(const char *path)
+{
+ int r;
+ int len;
+ ext4_file f;
+
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ struct ext4_inode_ref act;
+ struct ext4_inode_ref child;
+ struct ext4_dir_iter it;
+
+ u32int name_off;
+ u32int inode_up;
+ u32int inode_current;
+ u32int depth = 1;
+
+ bool has_children;
+ bool is_goal;
+ bool dir_end;
+
+ if (!mp)
+ return -1;
+
+ if (mp->fs.read_only) {
+ werrstr(Erdonlyfs);
+ return -1;
+ }
+
+ EXT4_MP_LOCK(mp);
+
+ struct ext4_fs *const fs = &mp->fs;
+
+ /*Check if exist.*/
+ r = ext4_generic_open(&f, path, "r", false, &inode_up, &name_off);
+ if (r != 0) {
+ EXT4_MP_UNLOCK(mp);
+ return r;
+ }
+
+ path += name_off;
+ len = ext4_path_check(path, &is_goal);
+ inode_current = f.inode;
+
+ ext4_block_cache_write_back(mp->fs.bdev, 1);
+
+ do {
+
+ u64int act_curr_pos = 0;
+ has_children = false;
+ dir_end = false;
+
+ while (r == 0 && !has_children && !dir_end) {
+
+ /*Load directory node.*/
+ r = ext4_fs_get_inode_ref(fs, inode_current, &act);
+ if (r != 0) {
+ break;
+ }
+
+ /*Initialize iterator.*/
+ r = ext4_dir_iterator_init(&it, &act, act_curr_pos);
+ if (r != 0) {
+ ext4_fs_put_inode_ref(&act);
+ break;
+ }
+
+ if (!it.curr) {
+ dir_end = true;
+ goto End;
+ }
+
+ ext4_trans_start(mp);
+
+ /*Get up directory inode when ".." entry*/
+ if ((it.curr->name_len == 2) &&
+ ext4_is_dots(it.curr->name, it.curr->name_len)) {
+ inode_up = ext4_dir_en_get_inode(it.curr);
+ }
+
+ /*If directory or file entry, but not "." ".." entry*/
+ if (!ext4_is_dots(it.curr->name, it.curr->name_len)) {
+
+ /*Get child inode reference do unlink
+ * directory/file.*/
+ u32int cinode;
+ u32int inode_type;
+ cinode = ext4_dir_en_get_inode(it.curr);
+ r = ext4_fs_get_inode_ref(fs, cinode, &child);
+ if (r != 0)
+ goto End;
+
+ /*If directory with no leaf children*/
+ r = ext4_has_children(&has_children, &child);
+ if (r != 0) {
+ ext4_fs_put_inode_ref(&child);
+ goto End;
+ }
+
+ if (has_children) {
+ /*Has directory children. Go into this
+ * directory.*/
+ inode_up = inode_current;
+ inode_current = cinode;
+ depth++;
+ ext4_fs_put_inode_ref(&child);
+ goto End;
+ }
+ inode_type = ext4_inode_type(&mp->fs.sb,
+ child.inode);
+
+ /* Truncate */
+ if (inode_type != EXT4_INODE_MODE_DIRECTORY)
+ r = ext4_trunc_inode(mp, child.index, 0);
+ else
+ r = ext4_trunc_dir(mp, &act, &child);
+
+ if (r != 0) {
+ ext4_fs_put_inode_ref(&child);
+ goto End;
+ }
+
+ /*No children in child directory or file. Just
+ * unlink.*/
+ r = ext4_unlink(f.mp, &act, &child,
+ (char *)it.curr->name,
+ it.curr->name_len);
+ if (r != 0) {
+ ext4_fs_put_inode_ref(&child);
+ goto End;
+ }
+
+ ext4_inode_set_del_time(child.inode, -1L);
+ ext4_inode_set_links_cnt(child.inode, 0);
+ child.dirty = true;
+
+ r = ext4_fs_free_inode(&child);
+ if (r != 0) {
+ ext4_fs_put_inode_ref(&child);
+ goto End;
+ }
+
+ r = ext4_fs_put_inode_ref(&child);
+ if (r != 0)
+ goto End;
+
+ }
+
+ r = ext4_dir_iterator_next(&it);
+ if (r != 0)
+ goto End;
+
+ act_curr_pos = it.curr_off;
+End:
+ ext4_dir_iterator_fini(&it);
+ if (r == 0)
+ r = ext4_fs_put_inode_ref(&act);
+ else
+ ext4_fs_put_inode_ref(&act);
+
+ if (r != 0)
+ ext4_trans_abort(mp);
+ else
+ ext4_trans_stop(mp);
+ }
+
+ if (dir_end) {
+ /*Directory iterator reached last entry*/
+ depth--;
+ if (depth)
+ inode_current = inode_up;
+
+ }
+
+ if (r != 0)
+ break;
+
+ } while (depth);
+
+ /*Last unlink*/
+ if (r == 0 && !depth) {
+ /*Load parent.*/
+ struct ext4_inode_ref parent;
+ r = ext4_fs_get_inode_ref(&f.mp->fs, inode_up,
+ &parent);
+ if (r != 0)
+ goto Finish;
+ r = ext4_fs_get_inode_ref(&f.mp->fs, inode_current,
+ &act);
+ if (r != 0) {
+ ext4_fs_put_inode_ref(&act);
+ goto Finish;
+ }
+
+ ext4_trans_start(mp);
+
+ /* In this place all directories should be
+ * unlinked.
+ * Last unlink from root of current directory*/
+ r = ext4_unlink(f.mp, &parent, &act,
+ (char *)path, len);
+ if (r != 0) {
+ ext4_fs_put_inode_ref(&parent);
+ ext4_fs_put_inode_ref(&act);
+ goto Finish;
+ }
+
+ if (ext4_inode_get_links_cnt(act.inode) == 2) {
+ ext4_inode_set_del_time(act.inode, -1L);
+ ext4_inode_set_links_cnt(act.inode, 0);
+ act.dirty = true;
+ /*Truncate*/
+ r = ext4_trunc_dir(mp, &parent, &act);
+ if (r != 0) {
+ ext4_fs_put_inode_ref(&parent);
+ ext4_fs_put_inode_ref(&act);
+ goto Finish;
+ }
+
+ r = ext4_fs_free_inode(&act);
+ if (r != 0) {
+ ext4_fs_put_inode_ref(&parent);
+ ext4_fs_put_inode_ref(&act);
+ goto Finish;
+ }
+ }
+
+ r = ext4_fs_put_inode_ref(&parent);
+ if (r != 0)
+ goto Finish;
+
+ r = ext4_fs_put_inode_ref(&act);
+ Finish:
+ if (r != 0)
+ ext4_trans_abort(mp);
+ else
+ ext4_trans_stop(mp);
+ }
+
+ ext4_block_cache_write_back(mp->fs.bdev, 0);
+ EXT4_MP_UNLOCK(mp);
+
+ return r;
+}
+
+int ext4_dir_mv(const char *path, const char *new_path)
+{
+ return ext4_frename(path, new_path);
+}
+
+int ext4_dir_mk(const char *path)
+{
+ int r;
+ ext4_file f;
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+
+ if (!mp)
+ return -1;
+
+ if (mp->fs.read_only) {
+ werrstr(Erdonlyfs);
+ return -1;
+ }
+
+ EXT4_MP_LOCK(mp);
+
+ /*Check if exist.*/
+ r = ext4_generic_open(&f, path, "r", false, 0, 0);
+ if (r == 0) {
+ werrstr(Eexists);
+ r = -1;
+ goto Finish;
+ }
+
+ /*Create new directory.*/
+ r = ext4_generic_open(&f, path, "w", false, 0, 0);
+
+Finish:
+ EXT4_MP_UNLOCK(mp);
+ return r;
+}
+
+int ext4_dir_open(ext4_dir *dir, const char *path)
+{
+ struct ext4_mountpoint *mp = ext4_get_mount(path);
+ int r;
+
+ if (!mp)
+ return -1;
+
+ EXT4_MP_LOCK(mp);
+ r = ext4_generic_open(&dir->f, path, "r", false, 0, 0);
+ dir->next_off = 0;
+ EXT4_MP_UNLOCK(mp);
+ return r;
+}
+
+int ext4_dir_close(ext4_dir *dir)
+{
+ return ext4_fclose(&dir->f);
+}
+
+const ext4_direntry *ext4_dir_entry_next(ext4_dir *dir)
+{
+#define EXT4_DIR_ENTRY_OFFSET_TERM (u64int)(-1)
+
+ int r;
+ u16int name_length;
+ ext4_direntry *de = 0;
+ struct ext4_inode_ref dir_inode;
+ struct ext4_dir_iter it;
+
+ EXT4_MP_LOCK(dir->f.mp);
+
+ if (dir->next_off == EXT4_DIR_ENTRY_OFFSET_TERM) {
+ EXT4_MP_UNLOCK(dir->f.mp);
+ return 0;
+ }
+
+ r = ext4_fs_get_inode_ref(&dir->f.mp->fs, dir->f.inode, &dir_inode);
+ if (r != 0) {
+ goto Finish;
+ }
+
+ r = ext4_dir_iterator_init(&it, &dir_inode, dir->next_off);
+ if (r != 0) {
+ ext4_fs_put_inode_ref(&dir_inode);
+ goto Finish;
+ }
+
+ memset(dir->de.name, 0, sizeof(dir->de.name));
+ name_length = ext4_dir_en_get_name_len(&dir->f.mp->fs.sb,
+ it.curr);
+ memcpy(dir->de.name, it.curr->name, name_length);
+
+ /* Directly copying the content isn't safe for Big-endian targets*/
+ dir->de.inode = ext4_dir_en_get_inode(it.curr);
+ dir->de.entry_length = ext4_dir_en_get_entry_len(it.curr);
+ dir->de.name_length = name_length;
+ dir->de.inode_type = ext4_dir_en_get_inode_type(&dir->f.mp->fs.sb,
+ it.curr);
+
+ de = &dir->de;
+
+ ext4_dir_iterator_next(&it);
+
+ dir->next_off = it.curr ? it.curr_off : EXT4_DIR_ENTRY_OFFSET_TERM;
+
+ ext4_dir_iterator_fini(&it);
+ ext4_fs_put_inode_ref(&dir_inode);
+
+Finish:
+ EXT4_MP_UNLOCK(dir->f.mp);
+ return de;
+}
+
+void ext4_dir_entry_rewind(ext4_dir *dir)
+{
+ dir->next_off = 0;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_balloc.c
@@ -1,0 +1,617 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_trans.h"
+#include "ext4_balloc.h"
+#include "ext4_super.h"
+#include "ext4_crc32.h"
+#include "ext4_block_group.h"
+#include "ext4_fs.h"
+#include "ext4_bitmap.h"
+#include "ext4_inode.h"
+
+/**@brief Compute number of block group from block address.
+ * @param sb superblock pointer.
+ * @param baddr Absolute address of block.
+ * @return Block group index
+ */
+u32int ext4_balloc_get_bgid_of_block(struct ext4_sblock *s,
+ u64int baddr)
+{
+ if (ext4_get32(s, first_data_block) && baddr)
+ baddr--;
+
+ return (u32int)(baddr / ext4_get32(s, blocks_per_group));
+}
+
+/**@brief Compute the starting block address of a block group
+ * @param sb superblock pointer.
+ * @param bgid block group index
+ * @return Block address
+ */
+u64int ext4_balloc_get_block_of_bgid(struct ext4_sblock *s,
+ u32int bgid)
+{
+ u64int baddr = 0;
+ if (ext4_get32(s, first_data_block))
+ baddr++;
+
+ baddr += bgid * ext4_get32(s, blocks_per_group);
+ return baddr;
+}
+
+static u32int ext4_balloc_bitmap_csum(struct ext4_sblock *sb,
+ void *bitmap)
+{
+ u32int checksum = 0;
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+ u32int blocks_per_group = ext4_get32(sb, blocks_per_group);
+
+ /* First calculate crc32 checksum against fs uuid */
+ checksum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid,
+ sizeof(sb->uuid));
+ /* Then calculate crc32 checksum against block_group_desc */
+ checksum = ext4_crc32c(checksum, bitmap, blocks_per_group / 8);
+ }
+ return checksum;
+}
+
+void ext4_balloc_set_bitmap_csum(struct ext4_sblock *sb,
+ struct ext4_bgroup *bg,
+ void *bitmap)
+{
+ int desc_size = ext4_sb_get_desc_size(sb);
+ u32int checksum = ext4_balloc_bitmap_csum(sb, bitmap);
+ u16int lo_checksum = to_le16(checksum & 0xFFFF),
+ hi_checksum = to_le16(checksum >> 16);
+
+ if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+ return;
+
+ /* See if we need to assign a 32bit checksum */
+ bg->block_bitmap_csum_lo = lo_checksum;
+ if (desc_size == EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ bg->block_bitmap_csum_hi = hi_checksum;
+
+}
+
+static bool
+ext4_balloc_verify_bitmap_csum(struct ext4_sblock *sb,
+ struct ext4_bgroup *bg,
+ void *bitmap)
+{
+ int desc_size = ext4_sb_get_desc_size(sb);
+ u32int checksum = ext4_balloc_bitmap_csum(sb, bitmap);
+ u16int lo_checksum = to_le16(checksum & 0xFFFF),
+ hi_checksum = to_le16(checksum >> 16);
+
+ if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+ return true;
+
+ if (bg->block_bitmap_csum_lo != lo_checksum)
+ return false;
+
+ if (desc_size == EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ if (bg->block_bitmap_csum_hi != hi_checksum)
+ return false;
+
+ return true;
+}
+
+int ext4_balloc_free_block(struct ext4_inode_ref *inode_ref, ext4_fsblk_t baddr)
+{
+ struct ext4_fs *fs = inode_ref->fs;
+ struct ext4_sblock *sb = &fs->sb;
+
+ u32int bg_id = ext4_balloc_get_bgid_of_block(sb, baddr);
+ u32int index_in_group = ext4_fs_addr_to_idx_bg(sb, baddr);
+
+ /* Load block group reference */
+ struct ext4_block_group_ref bg_ref;
+ int rc = ext4_fs_get_block_group_ref(fs, bg_id, &bg_ref);
+ if (rc != 0)
+ return rc;
+
+ struct ext4_bgroup *bg = bg_ref.block_group;
+
+ /* Load block with bitmap */
+ ext4_fsblk_t bitmap_block_addr =
+ ext4_bg_get_block_bitmap(bg, sb);
+
+ struct ext4_block bitmap_block;
+
+ rc = ext4_trans_block_get(fs->bdev, &bitmap_block, bitmap_block_addr);
+ if (rc != 0) {
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return rc;
+ }
+
+ if (!ext4_balloc_verify_bitmap_csum(sb, bg, bitmap_block.data)) {
+ ext4_dbg(DEBUG_BALLOC,
+ DBG_WARN "Bitmap checksum failed."
+ "Group: %ud\n",
+ bg_ref.index);
+ }
+
+ /* Modify bitmap */
+ ext4_bmap_bit_clr(bitmap_block.data, index_in_group);
+ ext4_balloc_set_bitmap_csum(sb, bg, bitmap_block.data);
+ ext4_trans_set_block_dirty(bitmap_block.buf);
+
+ /* Release block with bitmap */
+ rc = ext4_block_set(fs->bdev, &bitmap_block);
+ if (rc != 0) {
+ /* Error in saving bitmap */
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return rc;
+ }
+
+ u32int block_size = ext4_sb_get_block_size(sb);
+
+ /* Update superblock free blocks count */
+ u64int sb_free_blocks = ext4_sb_get_free_blocks_cnt(sb);
+ sb_free_blocks++;
+ ext4_sb_set_free_blocks_cnt(sb, sb_free_blocks);
+
+ /* Update inode blocks count */
+ u64int ino_blocks = ext4_inode_get_blocks_count(sb, inode_ref->inode);
+ ino_blocks -= block_size / EXT4_INODE_BLOCK_SIZE;
+ ext4_inode_set_blocks_count(sb, inode_ref->inode, ino_blocks);
+ inode_ref->dirty = true;
+
+ /* Update block group free blocks count */
+ u32int free_blocks = ext4_bg_get_free_blocks_count(bg, sb);
+ free_blocks++;
+ ext4_bg_set_free_blocks_count(bg, sb, free_blocks);
+
+ bg_ref.dirty = true;
+
+ rc = ext4_trans_try_revoke_block(fs->bdev, baddr);
+ if (rc != 0) {
+ bg_ref.dirty = false;
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return rc;
+ }
+ ext4_bcache_invalidate_lba(fs->bdev->bc, baddr, 1);
+ /* Release block group reference */
+ rc = ext4_fs_put_block_group_ref(&bg_ref);
+
+ return rc;
+}
+
+int ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref,
+ ext4_fsblk_t first, u32int count)
+{
+ int rc = 0;
+ u32int blk_cnt = count;
+ ext4_fsblk_t start_block = first;
+ struct ext4_fs *fs = inode_ref->fs;
+ struct ext4_sblock *sb = &fs->sb;
+
+ /* Compute indexes */
+ u32int bg_first = ext4_balloc_get_bgid_of_block(sb, first);
+
+ /* Compute indexes */
+ u32int bg_last = ext4_balloc_get_bgid_of_block(sb, first + count - 1);
+
+ if (!ext4_sb_feature_incom(sb, EXT4_FINCOM_FLEX_BG)) {
+ /*It is not possible without flex_bg that blocks are continuous
+ * and and last block belongs to other bg.*/
+ if (bg_last != bg_first) {
+ ext4_dbg(DEBUG_BALLOC, DBG_WARN "FLEX_BG: disabled & "
+ "bg_last: %ud bg_first: %ud\n",
+ bg_last, bg_first);
+ }
+ }
+
+ /* Load block group reference */
+ struct ext4_block_group_ref bg_ref;
+ while (bg_first <= bg_last) {
+
+ rc = ext4_fs_get_block_group_ref(fs, bg_first, &bg_ref);
+ if (rc != 0)
+ return rc;
+
+ struct ext4_bgroup *bg = bg_ref.block_group;
+
+ u32int idx_in_bg_first;
+ idx_in_bg_first = ext4_fs_addr_to_idx_bg(sb, first);
+
+ /* Load block with bitmap */
+ ext4_fsblk_t bitmap_blk = ext4_bg_get_block_bitmap(bg, sb);
+
+ struct ext4_block blk;
+ rc = ext4_trans_block_get(fs->bdev, &blk, bitmap_blk);
+ if (rc != 0) {
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return rc;
+ }
+
+ if (!ext4_balloc_verify_bitmap_csum(sb, bg, blk.data)) {
+ ext4_dbg(DEBUG_BALLOC,
+ DBG_WARN "Bitmap checksum failed."
+ "Group: %ud\n",
+ bg_ref.index);
+ }
+ u32int free_cnt;
+ free_cnt = ext4_sb_get_block_size(sb) * 8 - idx_in_bg_first;
+
+ /*If last block, free only count blocks*/
+ free_cnt = count > free_cnt ? free_cnt : count;
+
+ /* Modify bitmap */
+ ext4_bmap_bits_free(blk.data, idx_in_bg_first, free_cnt);
+ ext4_balloc_set_bitmap_csum(sb, bg, blk.data);
+ ext4_trans_set_block_dirty(blk.buf);
+
+ count -= free_cnt;
+ first += free_cnt;
+
+ /* Release block with bitmap */
+ rc = ext4_block_set(fs->bdev, &blk);
+ if (rc != 0) {
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return rc;
+ }
+
+ u32int block_size = ext4_sb_get_block_size(sb);
+
+ /* Update superblock free blocks count */
+ u64int sb_free_blocks = ext4_sb_get_free_blocks_cnt(sb);
+ sb_free_blocks += free_cnt;
+ ext4_sb_set_free_blocks_cnt(sb, sb_free_blocks);
+
+ /* Update inode blocks count */
+ u64int ino_blocks;
+ ino_blocks = ext4_inode_get_blocks_count(sb, inode_ref->inode);
+ ino_blocks -= free_cnt * (block_size / EXT4_INODE_BLOCK_SIZE);
+ ext4_inode_set_blocks_count(sb, inode_ref->inode, ino_blocks);
+ inode_ref->dirty = true;
+
+ /* Update block group free blocks count */
+ u32int free_blocks;
+ free_blocks = ext4_bg_get_free_blocks_count(bg, sb);
+ free_blocks += free_cnt;
+ ext4_bg_set_free_blocks_count(bg, sb, free_blocks);
+ bg_ref.dirty = true;
+
+ /* Release block group reference */
+ rc = ext4_fs_put_block_group_ref(&bg_ref);
+ if (rc != 0)
+ break;
+
+ bg_first++;
+ }
+
+ u32int i;
+ for (i = 0;i < blk_cnt;i++) {
+ rc = ext4_trans_try_revoke_block(fs->bdev, start_block + i);
+ if (rc != 0)
+ return rc;
+
+ }
+
+ ext4_bcache_invalidate_lba(fs->bdev->bc, start_block, blk_cnt);
+ /*All blocks should be released*/
+ assert(count == 0);
+
+ return rc;
+}
+
+int ext4_balloc_alloc_block(struct ext4_inode_ref *inode_ref,
+ ext4_fsblk_t goal,
+ ext4_fsblk_t *fblock)
+{
+ ext4_fsblk_t alloc;
+ ext4_fsblk_t bmp_blk_adr;
+ u32int rel_blk_idx = 0;
+ u64int free_blocks;
+ int r;
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+
+ /* Load block group number for goal and relative index */
+ u32int bg_id = ext4_balloc_get_bgid_of_block(sb, goal);
+ u32int idx_in_bg = ext4_fs_addr_to_idx_bg(sb, goal);
+
+ struct ext4_block b;
+ struct ext4_block_group_ref bg_ref;
+
+ /* Load block group reference */
+ r = ext4_fs_get_block_group_ref(inode_ref->fs, bg_id, &bg_ref);
+ if (r != 0)
+ return r;
+
+ struct ext4_bgroup *bg = bg_ref.block_group;
+
+ free_blocks = ext4_bg_get_free_blocks_count(bg_ref.block_group, sb);
+ if (free_blocks == 0) {
+ /* This group has no free blocks */
+ goto goal_failed;
+ }
+
+ /* Compute indexes */
+ ext4_fsblk_t first_in_bg;
+ first_in_bg = ext4_balloc_get_block_of_bgid(sb, bg_ref.index);
+
+ u32int first_in_bg_index;
+ first_in_bg_index = ext4_fs_addr_to_idx_bg(sb, first_in_bg);
+
+ if (idx_in_bg < first_in_bg_index)
+ idx_in_bg = first_in_bg_index;
+
+ /* Load block with bitmap */
+ bmp_blk_adr = ext4_bg_get_block_bitmap(bg_ref.block_group, sb);
+
+ r = ext4_trans_block_get(inode_ref->fs->bdev, &b, bmp_blk_adr);
+ if (r != 0) {
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return r;
+ }
+
+ if (!ext4_balloc_verify_bitmap_csum(sb, bg, b.data)) {
+ ext4_dbg(DEBUG_BALLOC,
+ DBG_WARN "Bitmap checksum failed."
+ "Group: %ud\n",
+ bg_ref.index);
+ }
+
+ /* Check if goal is free */
+ if (ext4_bmap_is_bit_clr(b.data, idx_in_bg)) {
+ ext4_bmap_bit_set(b.data, idx_in_bg);
+ ext4_balloc_set_bitmap_csum(sb, bg_ref.block_group,
+ b.data);
+ ext4_trans_set_block_dirty(b.buf);
+ r = ext4_block_set(inode_ref->fs->bdev, &b);
+ if (r != 0) {
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return r;
+ }
+
+ alloc = ext4_fs_bg_idx_to_addr(sb, idx_in_bg, bg_id);
+ goto success;
+ }
+
+ u32int blk_in_bg = ext4_blocks_in_group_cnt(sb, bg_id);
+
+ u32int end_idx = (idx_in_bg + 63) & ~63;
+ if (end_idx > blk_in_bg)
+ end_idx = blk_in_bg;
+
+ /* Try to find free block near to goal */
+ u32int tmp_idx;
+ for (tmp_idx = idx_in_bg + 1; tmp_idx < end_idx; ++tmp_idx) {
+ if (ext4_bmap_is_bit_clr(b.data, tmp_idx)) {
+ ext4_bmap_bit_set(b.data, tmp_idx);
+
+ ext4_balloc_set_bitmap_csum(sb, bg, b.data);
+ ext4_trans_set_block_dirty(b.buf);
+ r = ext4_block_set(inode_ref->fs->bdev, &b);
+ if (r != 0) {
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return r;
+ }
+
+ alloc = ext4_fs_bg_idx_to_addr(sb, tmp_idx, bg_id);
+ goto success;
+ }
+ }
+
+ /* Find free bit in bitmap */
+ bool no_space;
+ r = ext4_bmap_bit_find_clr(b.data, idx_in_bg, blk_in_bg, &rel_blk_idx, &no_space);
+ if (r == 0) {
+ ext4_bmap_bit_set(b.data, rel_blk_idx);
+ ext4_balloc_set_bitmap_csum(sb, bg_ref.block_group, b.data);
+ ext4_trans_set_block_dirty(b.buf);
+ r = ext4_block_set(inode_ref->fs->bdev, &b);
+ if (r != 0) {
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return r;
+ }
+
+ alloc = ext4_fs_bg_idx_to_addr(sb, rel_blk_idx, bg_id);
+ goto success;
+ }
+
+ /* No free block found yet */
+ r = ext4_block_set(inode_ref->fs->bdev, &b);
+ if (r != 0) {
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return r;
+ }
+
+goal_failed:
+
+ r = ext4_fs_put_block_group_ref(&bg_ref);
+ if (r != 0)
+ return r;
+
+ /* Try other block groups */
+ u32int block_group_count = ext4_block_group_cnt(sb);
+ u32int bgid = (bg_id + 1) % block_group_count;
+ u32int count = block_group_count;
+
+ while (count > 0) {
+ r = ext4_fs_get_block_group_ref(inode_ref->fs, bgid, &bg_ref);
+ if (r != 0)
+ return r;
+
+ struct ext4_bgroup *bg = bg_ref.block_group;
+ free_blocks = ext4_bg_get_free_blocks_count(bg, sb);
+ if (free_blocks == 0) {
+ /* This group has no free blocks */
+ goto next_group;
+ }
+
+ /* Load block with bitmap */
+ bmp_blk_adr = ext4_bg_get_block_bitmap(bg, sb);
+ r = ext4_trans_block_get(inode_ref->fs->bdev, &b, bmp_blk_adr);
+ if (r != 0) {
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return r;
+ }
+
+ if (!ext4_balloc_verify_bitmap_csum(sb, bg, b.data)) {
+ ext4_dbg(DEBUG_BALLOC,
+ DBG_WARN "Bitmap checksum failed."
+ "Group: %ud\n",
+ bg_ref.index);
+ }
+
+ /* Compute indexes */
+ first_in_bg = ext4_balloc_get_block_of_bgid(sb, bgid);
+ idx_in_bg = ext4_fs_addr_to_idx_bg(sb, first_in_bg);
+ blk_in_bg = ext4_blocks_in_group_cnt(sb, bgid);
+ first_in_bg_index = ext4_fs_addr_to_idx_bg(sb, first_in_bg);
+
+ if (idx_in_bg < first_in_bg_index)
+ idx_in_bg = first_in_bg_index;
+
+ bool no_space;
+ r = ext4_bmap_bit_find_clr(b.data, idx_in_bg, blk_in_bg, &rel_blk_idx, &no_space);
+ if (r == 0) {
+ ext4_bmap_bit_set(b.data, rel_blk_idx);
+ ext4_balloc_set_bitmap_csum(sb, bg, b.data);
+ ext4_trans_set_block_dirty(b.buf);
+ r = ext4_block_set(inode_ref->fs->bdev, &b);
+ if (r != 0) {
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return r;
+ }
+
+ alloc = ext4_fs_bg_idx_to_addr(sb, rel_blk_idx, bgid);
+ goto success;
+ }
+
+ r = ext4_block_set(inode_ref->fs->bdev, &b);
+ if (r != 0) {
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return r;
+ }
+
+ next_group:
+ r = ext4_fs_put_block_group_ref(&bg_ref);
+ if (r != 0) {
+ return r;
+ }
+
+ /* Goto next group */
+ bgid = (bgid + 1) % block_group_count;
+ count--;
+ }
+
+ werrstr("no free blocks");
+ return -1;
+
+success:
+ /* Empty command - because of syntax */
+ ;
+
+ u32int block_size = ext4_sb_get_block_size(sb);
+
+ /* Update superblock free blocks count */
+ u64int sb_free_blocks = ext4_sb_get_free_blocks_cnt(sb);
+ sb_free_blocks--;
+ ext4_sb_set_free_blocks_cnt(sb, sb_free_blocks);
+
+ /* Update inode blocks (different block size!) count */
+ u64int ino_blocks = ext4_inode_get_blocks_count(sb, inode_ref->inode);
+ ino_blocks += block_size / EXT4_INODE_BLOCK_SIZE;
+ ext4_inode_set_blocks_count(sb, inode_ref->inode, ino_blocks);
+ inode_ref->dirty = true;
+
+ /* Update block group free blocks count */
+
+ u32int fb_cnt = ext4_bg_get_free_blocks_count(bg_ref.block_group, sb);
+ fb_cnt--;
+ ext4_bg_set_free_blocks_count(bg_ref.block_group, sb, fb_cnt);
+
+ bg_ref.dirty = true;
+ r = ext4_fs_put_block_group_ref(&bg_ref);
+
+ *fblock = alloc;
+ return r;
+}
+
+int ext4_balloc_try_alloc_block(struct ext4_inode_ref *inode_ref,
+ ext4_fsblk_t baddr, bool *free)
+{
+ int rc;
+
+ struct ext4_fs *fs = inode_ref->fs;
+ struct ext4_sblock *sb = &fs->sb;
+
+ /* Compute indexes */
+ u32int block_group = ext4_balloc_get_bgid_of_block(sb, baddr);
+ u32int index_in_group = ext4_fs_addr_to_idx_bg(sb, baddr);
+
+ /* Load block group reference */
+ struct ext4_block_group_ref bg_ref;
+ rc = ext4_fs_get_block_group_ref(fs, block_group, &bg_ref);
+ if (rc != 0)
+ return rc;
+
+ /* Load block with bitmap */
+ ext4_fsblk_t bmp_blk_addr;
+ bmp_blk_addr = ext4_bg_get_block_bitmap(bg_ref.block_group, sb);
+
+ struct ext4_block b;
+ rc = ext4_trans_block_get(fs->bdev, &b, bmp_blk_addr);
+ if (rc != 0) {
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return rc;
+ }
+
+ if (!ext4_balloc_verify_bitmap_csum(sb, bg_ref.block_group, b.data)) {
+ ext4_dbg(DEBUG_BALLOC,
+ DBG_WARN "Bitmap checksum failed."
+ "Group: %ud\n",
+ bg_ref.index);
+ }
+
+ /* Check if block is free */
+ *free = ext4_bmap_is_bit_clr(b.data, index_in_group);
+
+ /* Allocate block if possible */
+ if (*free) {
+ ext4_bmap_bit_set(b.data, index_in_group);
+ ext4_balloc_set_bitmap_csum(sb, bg_ref.block_group, b.data);
+ ext4_trans_set_block_dirty(b.buf);
+ }
+
+ /* Release block with bitmap */
+ rc = ext4_block_set(fs->bdev, &b);
+ if (rc != 0) {
+ /* Error in saving bitmap */
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return rc;
+ }
+
+ /* If block is not free, return */
+ if (!(*free))
+ goto terminate;
+
+ u32int block_size = ext4_sb_get_block_size(sb);
+
+ /* Update superblock free blocks count */
+ u64int sb_free_blocks = ext4_sb_get_free_blocks_cnt(sb);
+ sb_free_blocks--;
+ ext4_sb_set_free_blocks_cnt(sb, sb_free_blocks);
+
+ /* Update inode blocks count */
+ u64int ino_blocks = ext4_inode_get_blocks_count(sb, inode_ref->inode);
+ ino_blocks += block_size / EXT4_INODE_BLOCK_SIZE;
+ ext4_inode_set_blocks_count(sb, inode_ref->inode, ino_blocks);
+ inode_ref->dirty = true;
+
+ /* Update block group free blocks count */
+ u32int fb_cnt = ext4_bg_get_free_blocks_count(bg_ref.block_group, sb);
+ fb_cnt--;
+ ext4_bg_set_free_blocks_count(bg_ref.block_group, sb, fb_cnt);
+
+ bg_ref.dirty = true;
+
+terminate:
+ return ext4_fs_put_block_group_ref(&bg_ref);
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_bcache.c
@@ -1,0 +1,286 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_bcache.h"
+#include "ext4_blockdev.h"
+#include "ext4_debug.h"
+
+static int ext4_bcache_lba_compare(struct ext4_buf *a, struct ext4_buf *b)
+{
+ if (a->lba > b->lba)
+ return 1;
+ else if (a->lba < b->lba)
+ return -1;
+ return 0;
+}
+
+static int ext4_bcache_lru_compare(struct ext4_buf *a, struct ext4_buf *b)
+{
+ if (a->lru_id > b->lru_id)
+ return 1;
+ else if (a->lru_id < b->lru_id)
+ return -1;
+ return 0;
+}
+
+RB_GENERATE_INTERNAL(ext4_buf_lba, ext4_buf, lba_node,
+ ext4_bcache_lba_compare, static inline)
+RB_GENERATE_INTERNAL(ext4_buf_lru, ext4_buf, lru_node,
+ ext4_bcache_lru_compare, static inline)
+
+int ext4_bcache_init_dynamic(struct ext4_bcache *bc, u32int cnt,
+ u32int itemsize)
+{
+ assert(bc && cnt && itemsize);
+
+ memset(bc, 0, sizeof(struct ext4_bcache));
+
+ bc->cnt = cnt;
+ bc->itemsize = itemsize;
+ bc->ref_blocks = 0;
+ bc->max_ref_blocks = 0;
+
+ return 0;
+}
+
+void ext4_bcache_cleanup(struct ext4_bcache *bc)
+{
+ struct ext4_buf *buf, *tmp;
+ RB_FOREACH_SAFE(buf, ext4_buf_lba, &bc->lba_root, tmp) {
+ ext4_block_flush_buf(bc->bdev, buf);
+ ext4_bcache_drop_buf(bc, buf);
+ }
+}
+
+int ext4_bcache_fini_dynamic(struct ext4_bcache *bc)
+{
+ memset(bc, 0, sizeof(struct ext4_bcache));
+ return 0;
+}
+
+/**@brief:
+ *
+ * This is ext4_bcache, the module handling basic buffer-cache stuff.
+ *
+ * Buffers in a bcache are sorted by their LBA and stored in a
+ * RB-Tree(lba_root).
+ *
+ * Bcache also maintains another RB-Tree(lru_root) right now, where
+ * buffers are sorted by their LRU id.
+ *
+ * A singly-linked list is used to track those dirty buffers which are
+ * ready to be flushed. (Those buffers which are dirty but also referenced
+ * are not considered ready to be flushed.)
+ *
+ * When a buffer is not referenced, it will be stored in both lba_root
+ * and lru_root, while it will only be stored in lba_root when it is
+ * referenced.
+ */
+
+static struct ext4_buf *
+ext4_buf_alloc(struct ext4_bcache *bc, u64int lba)
+{
+ void *data;
+ struct ext4_buf *buf;
+ data = ext4_malloc(bc->itemsize);
+ if (!data)
+ return nil;
+
+ buf = ext4_calloc(1, sizeof(struct ext4_buf));
+ if (!buf) {
+ ext4_free(data);
+ return nil;
+ }
+
+ buf->lba = lba;
+ buf->data = data;
+ buf->bc = bc;
+ return buf;
+}
+
+static void ext4_buf_free(struct ext4_buf *buf)
+{
+ ext4_free(buf->data);
+ ext4_free(buf);
+}
+
+static struct ext4_buf *
+ext4_buf_lookup(struct ext4_bcache *bc, u64int lba)
+{
+ struct ext4_buf tmp = {
+ .lba = lba
+ };
+
+ return RB_FIND(ext4_buf_lba, &bc->lba_root, &tmp);
+}
+
+struct ext4_buf *ext4_buf_lowest_lru(struct ext4_bcache *bc)
+{
+ return RB_MIN(ext4_buf_lru, &bc->lru_root);
+}
+
+void ext4_bcache_drop_buf(struct ext4_bcache *bc, struct ext4_buf *buf)
+{
+ /* Warn on dropping any referenced buffers.*/
+ if (buf->refctr) {
+ ext4_dbg(DEBUG_BCACHE, DBG_WARN "Buffer is still referenced. "
+ "lba: %llud, refctr: %ud\n",
+ buf->lba, buf->refctr);
+ } else
+ RB_REMOVE(ext4_buf_lru, &bc->lru_root, buf);
+
+ RB_REMOVE(ext4_buf_lba, &bc->lba_root, buf);
+
+ /*Forcibly drop dirty buffer.*/
+ if (ext4_bcache_test_flag(buf, BC_DIRTY))
+ ext4_bcache_remove_dirty_node(bc, buf);
+
+ ext4_buf_free(buf);
+ bc->ref_blocks--;
+}
+
+void ext4_bcache_invalidate_buf(struct ext4_bcache *bc,
+ struct ext4_buf *buf)
+{
+ buf->end_write = nil;
+ buf->end_write_arg = nil;
+
+ /* Clear both dirty and up-to-date flags. */
+ if (ext4_bcache_test_flag(buf, BC_DIRTY))
+ ext4_bcache_remove_dirty_node(bc, buf);
+
+ ext4_bcache_clear_dirty(buf);
+}
+
+void ext4_bcache_invalidate_lba(struct ext4_bcache *bc,
+ u64int from,
+ u32int cnt)
+{
+ u64int end = from + cnt - 1;
+ struct ext4_buf *tmp = ext4_buf_lookup(bc, from), *buf;
+ RB_FOREACH_FROM(buf, ext4_buf_lba, tmp) {
+ if (buf->lba > end)
+ break;
+
+ ext4_bcache_invalidate_buf(bc, buf);
+ }
+}
+
+struct ext4_buf *
+ext4_bcache_find_get(struct ext4_bcache *bc, struct ext4_block *b,
+ u64int lba)
+{
+ struct ext4_buf *buf = ext4_buf_lookup(bc, lba);
+ if (buf) {
+ /* If buffer is not referenced. */
+ if (!buf->refctr) {
+ /* Assign new value to LRU id and increment LRU counter
+ * by 1*/
+ buf->lru_id = ++bc->lru_ctr;
+ RB_REMOVE(ext4_buf_lru, &bc->lru_root, buf);
+ if (ext4_bcache_test_flag(buf, BC_DIRTY))
+ ext4_bcache_remove_dirty_node(bc, buf);
+
+ }
+
+ ext4_bcache_inc_ref(buf);
+
+ b->lb_id = lba;
+ b->buf = buf;
+ b->data = buf->data;
+ }
+ return buf;
+}
+
+int ext4_bcache_alloc(struct ext4_bcache *bc, struct ext4_block *b,
+ bool *is_new)
+{
+ /* Try to search the buffer with exaxt LBA. */
+
+ struct ext4_buf *buf = ext4_bcache_find_get(bc, b, b->lb_id);
+ if (buf) {
+ *is_new = false;
+ return 0;
+ }
+
+ /* We need to allocate one buffer.*/
+ buf = ext4_buf_alloc(bc, b->lb_id);
+ if (!buf) {
+ werrstr("memory");
+ return -1;
+ }
+
+ RB_INSERT(ext4_buf_lba, &bc->lba_root, buf);
+ /* One more buffer in bcache now. :-) */
+ bc->ref_blocks++;
+
+ /*Calc ref blocks max depth*/
+ if (bc->max_ref_blocks < bc->ref_blocks)
+ bc->max_ref_blocks = bc->ref_blocks;
+
+
+ ext4_bcache_inc_ref(buf);
+ /* Assign new value to LRU id and increment LRU counter
+ * by 1*/
+ buf->lru_id = ++bc->lru_ctr;
+
+ b->buf = buf;
+ b->data = buf->data;
+
+ *is_new = true;
+ return 0;
+}
+
+int ext4_bcache_free(struct ext4_bcache *bc, struct ext4_block *b)
+{
+ struct ext4_buf *buf = b->buf;
+
+ assert(bc && b);
+
+ /*Check if valid.*/
+ if (!b->lb_id) {
+ werrstr("invalid block id");
+ return -1;
+ }
+
+ /*Block should have a valid pointer to ext4_buf.*/
+ assert(buf);
+
+ /*Check if someone don't try free unreferenced block cache.*/
+ assert(buf->refctr);
+
+ /*Just decrease reference counter*/
+ ext4_bcache_dec_ref(buf);
+
+ /* We are the last one touching this buffer, do the cleanups. */
+ if (!buf->refctr) {
+ RB_INSERT(ext4_buf_lru, &bc->lru_root, buf);
+ /* This buffer is ready to be flushed. */
+ if (ext4_bcache_test_flag(buf, BC_DIRTY) &&
+ ext4_bcache_test_flag(buf, BC_UPTODATE)) {
+ if (bc->bdev->cache_write_back &&
+ !ext4_bcache_test_flag(buf, BC_FLUSH) &&
+ !ext4_bcache_test_flag(buf, BC_TMP))
+ ext4_bcache_insert_dirty_node(bc, buf);
+ else {
+ ext4_block_flush_buf(bc->bdev, buf);
+ ext4_bcache_clear_flag(buf, BC_FLUSH);
+ }
+ }
+
+ /* The buffer is invalidated...drop it. */
+ if (!ext4_bcache_test_flag(buf, BC_UPTODATE) ||
+ ext4_bcache_test_flag(buf, BC_TMP))
+ ext4_bcache_drop_buf(bc, buf);
+
+ }
+
+ b->lb_id = 0;
+ b->data = 0;
+
+ return 0;
+}
+
+bool ext4_bcache_is_full(struct ext4_bcache *bc)
+{
+ return (bc->cnt <= bc->ref_blocks);
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_bitmap.c
@@ -1,0 +1,84 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_bitmap.h"
+
+void ext4_bmap_bits_free(u8int *bmap, u32int sbit, u32int bcnt)
+{
+ u32int i = sbit;
+
+ while (i & 7) {
+
+ if (!bcnt)
+ return;
+
+ ext4_bmap_bit_clr(bmap, i);
+
+ bcnt--;
+ i++;
+ }
+ sbit = i;
+ bmap += sbit >> 3;
+
+ memset(bmap, 0, bcnt >> 3);
+ bmap += bcnt >> 3;
+
+ for (i = 0; i < bcnt; ++i) {
+ ext4_bmap_bit_clr(bmap, i);
+ }
+}
+
+int ext4_bmap_bit_find_clr(u8int *bmap, u32int sbit, u32int ebit,
+ u32int *bit_id, bool *no_space)
+{
+ u32int i;
+ u32int bcnt = ebit - sbit;
+
+ i = sbit;
+ *no_space = false;
+
+ while (i & 7) {
+
+ if(!bcnt){
+Nospace:
+ *no_space = true;
+ return -1;
+ }
+
+ if (ext4_bmap_is_bit_clr(bmap, i)) {
+ *bit_id = sbit;
+ return 0;
+ }
+
+ i++;
+ bcnt--;
+ }
+
+ sbit = i;
+ bmap += (sbit >> 3);
+
+ while (bcnt >= 8) {
+ if (*bmap != 0xFF) {
+ for (i = 0; i < 8; ++i) {
+ if (ext4_bmap_is_bit_clr(bmap, i)) {
+ *bit_id = sbit + i;
+ return 0;
+ }
+ }
+ }
+
+ bmap += 1;
+ bcnt -= 8;
+ sbit += 8;
+ }
+
+ for (i = 0; i < bcnt; ++i) {
+ if (ext4_bmap_is_bit_clr(bmap, i)) {
+ *bit_id = sbit + i;
+ return 0;
+ }
+ }
+
+ goto Nospace;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_block_group.c
@@ -1,0 +1,47 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_block_group.h"
+
+/**@brief CRC-16 look up table*/
+static u16int const crc16_tab[256] = {
+ 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, 0xC601,
+ 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, 0xCC01, 0x0CC0,
+ 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, 0x0A00, 0xCAC1, 0xCB81,
+ 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, 0xD801, 0x18C0, 0x1980, 0xD941,
+ 0x1B00, 0xDBC1, 0xDA81, 0x1A40, 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01,
+ 0x1DC0, 0x1C80, 0xDC41, 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0,
+ 0x1680, 0xD641, 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081,
+ 0x1040, 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240,
+ 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, 0x3C00,
+ 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, 0xFA01, 0x3AC0,
+ 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, 0x2800, 0xE8C1, 0xE981,
+ 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, 0xEE01, 0x2EC0, 0x2F80, 0xEF41,
+ 0x2D00, 0xEDC1, 0xEC81, 0x2C40, 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700,
+ 0xE7C1, 0xE681, 0x2640, 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0,
+ 0x2080, 0xE041, 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281,
+ 0x6240, 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441,
+ 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, 0xAA01,
+ 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, 0x7800, 0xB8C1,
+ 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, 0xBE01, 0x7EC0, 0x7F80,
+ 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, 0xB401, 0x74C0, 0x7580, 0xB541,
+ 0x7700, 0xB7C1, 0xB681, 0x7640, 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101,
+ 0x71C0, 0x7080, 0xB041, 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0,
+ 0x5280, 0x9241, 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481,
+ 0x5440, 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40,
+ 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, 0x8801,
+ 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, 0x4E00, 0x8EC1,
+ 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, 0x4400, 0x84C1, 0x8581,
+ 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, 0x8201, 0x42C0, 0x4380, 0x8341,
+ 0x4100, 0x81C1, 0x8081, 0x4040};
+
+u16int ext4_bg_crc16(u16int crc, const u8int *buffer, usize len)
+{
+ while (len--)
+
+ crc = (((crc >> 8) & 0xffU) ^
+ crc16_tab[(crc ^ *buffer++) & 0xffU]) &
+ 0x0000ffffU;
+ return crc;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_blockdev.c
@@ -1,0 +1,443 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_blockdev.h"
+#include "ext4_fs.h"
+#include "ext4_journal.h"
+
+static char Eoorop[] = "out of range operation";
+
+static void ext4_bdif_lock(struct ext4_blockdev *bdev)
+{
+ if (!bdev->bdif->lock)
+ return;
+
+ int r = bdev->bdif->lock(bdev);
+ assert(r == 0);
+}
+
+static void ext4_bdif_unlock(struct ext4_blockdev *bdev)
+{
+ if (!bdev->bdif->unlock)
+ return;
+
+ int r = bdev->bdif->unlock(bdev);
+ assert(r == 0);
+}
+
+static int ext4_bdif_bread(struct ext4_blockdev *bdev, void *buf,
+ u64int blk_id, u32int blk_cnt)
+{
+ ext4_bdif_lock(bdev);
+ int r = bdev->bdif->bread(bdev, buf, blk_id, blk_cnt);
+ bdev->bdif->bread_ctr++;
+ ext4_bdif_unlock(bdev);
+ return r;
+}
+
+static int ext4_bdif_bwrite(struct ext4_blockdev *bdev, const void *buf,
+ u64int blk_id, u32int blk_cnt)
+{
+ ext4_bdif_lock(bdev);
+ int r = bdev->bdif->bwrite(bdev, buf, blk_id, blk_cnt);
+ bdev->bdif->bwrite_ctr++;
+ ext4_bdif_unlock(bdev);
+ return r;
+}
+
+int ext4_block_init(struct ext4_blockdev *bdev)
+{
+ int rc;
+ assert(bdev);
+ assert(bdev->bdif);
+ assert(bdev->bdif->open &&
+ bdev->bdif->close &&
+ bdev->bdif->bread &&
+ bdev->bdif->bwrite);
+
+ if (bdev->bdif->ph_refctr) {
+ bdev->bdif->ph_refctr++;
+ return 0;
+ }
+
+ /*Low level block init*/
+ rc = bdev->bdif->open(bdev);
+ if (rc != 0)
+ return rc;
+
+ bdev->bdif->ph_refctr = 1;
+ return 0;
+}
+
+int ext4_block_bind_bcache(struct ext4_blockdev *bdev, struct ext4_bcache *bc)
+{
+ assert(bdev && bc);
+ bdev->bc = bc;
+ bc->bdev = bdev;
+ return 0;
+}
+
+void ext4_block_set_lb_size(struct ext4_blockdev *bdev, u32int lb_bsize)
+{
+ /*Logical block size has to be multiply of physical */
+ assert(!(lb_bsize % bdev->bdif->ph_bsize));
+
+ bdev->lg_bsize = lb_bsize;
+ bdev->lg_bcnt = bdev->part_size / lb_bsize;
+}
+
+int ext4_block_fini(struct ext4_blockdev *bdev)
+{
+ assert(bdev);
+
+ if (!bdev->bdif->ph_refctr)
+ return 0;
+
+ bdev->bdif->ph_refctr--;
+ if (bdev->bdif->ph_refctr)
+ return 0;
+
+ /*Low level block fini*/
+ return bdev->bdif->close(bdev);
+}
+
+int ext4_block_flush_buf(struct ext4_blockdev *bdev, struct ext4_buf *buf)
+{
+ int r;
+ struct ext4_bcache *bc = bdev->bc;
+
+ if (ext4_bcache_test_flag(buf, BC_DIRTY) &&
+ ext4_bcache_test_flag(buf, BC_UPTODATE)) {
+ r = ext4_blocks_set_direct(bdev, buf->data, buf->lba, 1);
+ if (r) {
+ if (buf->end_write) {
+ bc->dont_shake = true;
+ buf->end_write(bc, buf, r, buf->end_write_arg);
+ bc->dont_shake = false;
+ }
+
+ return r;
+ }
+
+ ext4_bcache_remove_dirty_node(bc, buf);
+ ext4_bcache_clear_flag(buf, BC_DIRTY);
+ if (buf->end_write) {
+ bc->dont_shake = true;
+ buf->end_write(bc, buf, r, buf->end_write_arg);
+ bc->dont_shake = false;
+ }
+ }
+ return 0;
+}
+
+int ext4_block_flush_lba(struct ext4_blockdev *bdev, u64int lba)
+{
+ int r = 0;
+ struct ext4_buf *buf;
+ struct ext4_block b;
+ buf = ext4_bcache_find_get(bdev->bc, &b, lba);
+ if (buf) {
+ r = ext4_block_flush_buf(bdev, buf);
+ ext4_bcache_free(bdev->bc, &b);
+ }
+ return r;
+}
+
+int ext4_block_cache_shake(struct ext4_blockdev *bdev)
+{
+ int r = 0;
+ struct ext4_buf *buf;
+ if (bdev->bc->dont_shake)
+ return 0;
+
+ bdev->bc->dont_shake = true;
+
+ while (!RB_EMPTY(&bdev->bc->lru_root) &&
+ ext4_bcache_is_full(bdev->bc)) {
+
+ buf = ext4_buf_lowest_lru(bdev->bc);
+ assert(buf);
+ if (ext4_bcache_test_flag(buf, BC_DIRTY)) {
+ r = ext4_block_flush_buf(bdev, buf);
+ if (r != 0)
+ break;
+
+ }
+
+ ext4_bcache_drop_buf(bdev->bc, buf);
+ }
+ bdev->bc->dont_shake = false;
+ return r;
+}
+
+int ext4_block_get_noread(struct ext4_blockdev *bdev, struct ext4_block *b,
+ u64int lba)
+{
+ bool is_new;
+ int r;
+
+ assert(bdev && b);
+
+ if (!bdev->bdif->ph_refctr || !(lba < bdev->lg_bcnt)) {
+ werrstr(Eio);
+ return -1;
+ }
+
+ b->lb_id = lba;
+
+ /*If cache is full we have to (flush and) drop it anyway :(*/
+ r = ext4_block_cache_shake(bdev);
+ if (r != 0)
+ return r;
+
+ r = ext4_bcache_alloc(bdev->bc, b, &is_new);
+ if (r != 0)
+ return r;
+
+ if (!b->data) {
+ werrstr("memory");
+ return -1;
+ }
+
+ return 0;
+}
+
+int ext4_block_get(struct ext4_blockdev *bdev, struct ext4_block *b,
+ u64int lba)
+{
+ int r = ext4_block_get_noread(bdev, b, lba);
+ if (r != 0)
+ return r;
+
+ if (ext4_bcache_test_flag(b->buf, BC_UPTODATE)) {
+ /* Data in the cache is up-to-date.
+ * Reading from physical device is not required */
+ return 0;
+ }
+
+ r = ext4_blocks_get_direct(bdev, b->data, lba, 1);
+ if (r != 0) {
+ ext4_bcache_free(bdev->bc, b);
+ b->lb_id = 0;
+ return r;
+ }
+
+ /* Mark buffer up-to-date, since
+ * fresh data is read from physical device just now. */
+ ext4_bcache_set_flag(b->buf, BC_UPTODATE);
+ return 0;
+}
+
+int ext4_block_set(struct ext4_blockdev *bdev, struct ext4_block *b)
+{
+ assert(bdev && b);
+ assert(b->buf);
+
+ if (!bdev->bdif->ph_refctr) {
+ werrstr(Eio);
+ return -1;
+ }
+
+ return ext4_bcache_free(bdev->bc, b);
+}
+
+int ext4_blocks_get_direct(struct ext4_blockdev *bdev, void *buf, u64int lba,
+ u32int cnt)
+{
+ u64int pba;
+ u32int pb_cnt;
+
+ assert(bdev && buf);
+
+ pba = (lba * bdev->lg_bsize + bdev->part_offset) / bdev->bdif->ph_bsize;
+ pb_cnt = bdev->lg_bsize / bdev->bdif->ph_bsize;
+
+ return ext4_bdif_bread(bdev, buf, pba, pb_cnt * cnt);
+}
+
+int ext4_blocks_set_direct(struct ext4_blockdev *bdev, const void *buf,
+ u64int lba, u32int cnt)
+{
+ u64int pba;
+ u32int pb_cnt;
+
+ assert(bdev && buf);
+
+ pba = (lba * bdev->lg_bsize + bdev->part_offset) / bdev->bdif->ph_bsize;
+ pb_cnt = bdev->lg_bsize / bdev->bdif->ph_bsize;
+
+ return ext4_bdif_bwrite(bdev, buf, pba, pb_cnt * cnt);
+}
+
+int ext4_block_writebytes(struct ext4_blockdev *bdev, u64int off,
+ const void *buf, u32int len)
+{
+ u64int block_idx;
+ u32int blen;
+ u32int unalg;
+ int r = 0;
+
+ const u8int *p = (void *)buf;
+
+ assert(bdev && buf);
+
+ if (!bdev->bdif->ph_refctr) {
+ werrstr(Eio);
+ return -1;
+ }
+
+ if (off + len > bdev->part_size) {
+ werrstr(Eoorop);
+ return -1;
+ }
+
+ block_idx = ((off + bdev->part_offset) / bdev->bdif->ph_bsize);
+
+ /*OK lets deal with the first possible unaligned block*/
+ unalg = (off & (bdev->bdif->ph_bsize - 1));
+ if (unalg) {
+
+ u32int wlen = (bdev->bdif->ph_bsize - unalg) > len
+ ? len
+ : (bdev->bdif->ph_bsize - unalg);
+
+ r = ext4_bdif_bread(bdev, bdev->bdif->ph_bbuf, block_idx, 1);
+ if (r != 0)
+ return r;
+
+ memcpy(bdev->bdif->ph_bbuf + unalg, p, wlen);
+ r = ext4_bdif_bwrite(bdev, bdev->bdif->ph_bbuf, block_idx, 1);
+ if (r != 0)
+ return r;
+
+ p += wlen;
+ len -= wlen;
+ block_idx++;
+ }
+
+ /*Aligned data*/
+ blen = len / bdev->bdif->ph_bsize;
+ if (blen != 0) {
+ r = ext4_bdif_bwrite(bdev, p, block_idx, blen);
+ if (r != 0)
+ return r;
+
+ p += bdev->bdif->ph_bsize * blen;
+ len -= bdev->bdif->ph_bsize * blen;
+
+ block_idx += blen;
+ }
+
+ /*Rest of the data*/
+ if (len) {
+ r = ext4_bdif_bread(bdev, bdev->bdif->ph_bbuf, block_idx, 1);
+ if (r != 0)
+ return r;
+
+ memcpy(bdev->bdif->ph_bbuf, p, len);
+ r = ext4_bdif_bwrite(bdev, bdev->bdif->ph_bbuf, block_idx, 1);
+ if (r != 0)
+ return r;
+ }
+
+ return r;
+}
+
+int ext4_block_readbytes(struct ext4_blockdev *bdev, u64int off, void *buf,
+ u32int len)
+{
+ u64int block_idx;
+ u32int blen;
+ u32int unalg;
+ int r = 0;
+
+ u8int *p = (void *)buf;
+
+ assert(bdev && buf);
+
+ if (!bdev->bdif->ph_refctr) {
+ werrstr(Eio);
+ return -1;
+ }
+
+ if (off + len > bdev->part_size) {
+ werrstr(Eoorop);
+ return -1;
+ }
+
+ block_idx = ((off + bdev->part_offset) / bdev->bdif->ph_bsize);
+
+ /*OK lets deal with the first possible unaligned block*/
+ unalg = (off & (bdev->bdif->ph_bsize - 1));
+ if (unalg) {
+
+ u32int rlen = (bdev->bdif->ph_bsize - unalg) > len
+ ? len
+ : (bdev->bdif->ph_bsize - unalg);
+
+ r = ext4_bdif_bread(bdev, bdev->bdif->ph_bbuf, block_idx, 1);
+ if (r != 0)
+ return r;
+
+ memcpy(p, bdev->bdif->ph_bbuf + unalg, rlen);
+
+ p += rlen;
+ len -= rlen;
+ block_idx++;
+ }
+
+ /*Aligned data*/
+ blen = len / bdev->bdif->ph_bsize;
+
+ if (blen != 0) {
+ r = ext4_bdif_bread(bdev, p, block_idx, blen);
+ if (r != 0)
+ return r;
+
+ p += bdev->bdif->ph_bsize * blen;
+ len -= bdev->bdif->ph_bsize * blen;
+
+ block_idx += blen;
+ }
+
+ /*Rest of the data*/
+ if (len) {
+ r = ext4_bdif_bread(bdev, bdev->bdif->ph_bbuf, block_idx, 1);
+ if (r != 0)
+ return r;
+
+ memcpy(p, bdev->bdif->ph_bbuf, len);
+ }
+
+ return r;
+}
+
+int ext4_block_cache_flush(struct ext4_blockdev *bdev)
+{
+ while (!SLIST_EMPTY(&bdev->bc->dirty_list)) {
+ int r;
+ struct ext4_buf *buf = SLIST_FIRST(&bdev->bc->dirty_list);
+ assert(buf);
+ r = ext4_block_flush_buf(bdev, buf);
+ if (r != 0)
+ return r;
+
+ }
+ return 0;
+}
+
+int ext4_block_cache_write_back(struct ext4_blockdev *bdev, u8int on_off)
+{
+ if (on_off)
+ bdev->cache_write_back++;
+
+ if (!on_off && bdev->cache_write_back)
+ bdev->cache_write_back--;
+
+ if (bdev->cache_write_back)
+ return 0;
+
+ /*Flush data in all delayed cache blocks*/
+ return ext4_block_cache_flush(bdev);
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_crc32.c
@@ -1,0 +1,144 @@
+/* Based on FreeBSD. */
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_crc32.h"
+
+static const u32int crc32_tab[] = {
+ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
+ 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+ 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
+ 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+ 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
+ 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+ 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
+ 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+ 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
+ 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
+ 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+ 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
+ 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+ 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+ 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+ 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
+ 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+ 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
+ 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
+ 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+ 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
+ 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+ 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+ 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+ 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
+ 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+ 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
+ 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
+ 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+ 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
+ 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+ 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+ 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+ 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
+ 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+ 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
+ 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
+ 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+ 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
+};
+
+/* */
+/* CRC LOOKUP TABLE */
+/* ================ */
+/* The following CRC lookup table was generated automagically */
+/* by the Rocksoft^tm Model CRC Algorithm Table Generation */
+/* Program V1.0 using the following model parameters: */
+/* */
+/* Width : 4 bytes. */
+/* Poly : 0x1EDC6F41L */
+/* Reverse : TRUE. */
+/* */
+/* For more information on the Rocksoft^tm Model CRC Algorithm, */
+/* see the document titled "A Painless Guide to CRC Error */
+/* Detection Algorithms" by Ross Williams */
+/* ([email protected].). This document is likely to be */
+/* in the FTP archive "ftp.adelaide.edu.au/pub/rocksoft". */
+/* */
+static const u32int crc32c_tab[256] = {
+ 0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L, 0xC79A971FL,
+ 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL, 0x8AD958CFL, 0x78B2DBCCL,
+ 0x6BE22838L, 0x9989AB3BL, 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L,
+ 0x5E133C24L, 0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL,
+ 0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L, 0x9A879FA0L,
+ 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L, 0x5D1D08BFL, 0xAF768BBCL,
+ 0xBC267848L, 0x4E4DFB4BL, 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L,
+ 0x33ED7D2AL, 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L,
+ 0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L, 0x6DFE410EL,
+ 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL, 0x30E349B1L, 0xC288CAB2L,
+ 0xD1D83946L, 0x23B3BA45L, 0xF779DEAEL, 0x05125DADL, 0x1642AE59L,
+ 0xE4292D5AL, 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL,
+ 0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L, 0x417B1DBCL,
+ 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L, 0x86E18AA3L, 0x748A09A0L,
+ 0x67DAFA54L, 0x95B17957L, 0xCBA24573L, 0x39C9C670L, 0x2A993584L,
+ 0xD8F2B687L, 0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L,
+ 0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L, 0x96BF4DCCL,
+ 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L, 0xDBFC821CL, 0x2997011FL,
+ 0x3AC7F2EBL, 0xC8AC71E8L, 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L,
+ 0x0F36E6F7L, 0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L,
+ 0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L, 0xEB1FCBADL,
+ 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L, 0x2C855CB2L, 0xDEEEDFB1L,
+ 0xCDBE2C45L, 0x3FD5AF46L, 0x7198540DL, 0x83F3D70EL, 0x90A324FAL,
+ 0x62C8A7F9L, 0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L,
+ 0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L, 0x3CDB9BDDL,
+ 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L, 0x82F63B78L, 0x709DB87BL,
+ 0x63CD4B8FL, 0x91A6C88CL, 0x456CAC67L, 0xB7072F64L, 0xA457DC90L,
+ 0x563C5F93L, 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L,
+ 0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL, 0x92A8FC17L,
+ 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L, 0x55326B08L, 0xA759E80BL,
+ 0xB4091BFFL, 0x466298FCL, 0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL,
+ 0x0B21572CL, 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L,
+ 0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L, 0x65D122B9L,
+ 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL, 0x2892ED69L, 0xDAF96E6AL,
+ 0xC9A99D9EL, 0x3BC21E9DL, 0xEF087A76L, 0x1D63F975L, 0x0E330A81L,
+ 0xFC588982L, 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL,
+ 0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L, 0x38CC2A06L,
+ 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L, 0xFF56BD19L, 0x0D3D3E1AL,
+ 0x1E6DCDEEL, 0xEC064EEDL, 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L,
+ 0xD0DDD530L, 0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL,
+ 0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL, 0x8ECEE914L,
+ 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L, 0xD3D3E1ABL, 0x21B862A8L,
+ 0x32E8915CL, 0xC083125FL, 0x144976B4L, 0xE622F5B7L, 0xF5720643L,
+ 0x07198540L, 0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L,
+ 0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL, 0xE330A81AL,
+ 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL, 0x24AA3F05L, 0xD6C1BC06L,
+ 0xC5914FF2L, 0x37FACCF1L, 0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L,
+ 0x7AB90321L, 0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL,
+ 0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L, 0x34F4F86AL,
+ 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL, 0x79B737BAL, 0x8BDCB4B9L,
+ 0x988C474DL, 0x6AE7C44EL, 0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L,
+ 0xAD7D5351L};
+
+static inline u32int crc32(u32int crc, const void *buf, u32int size,
+ const u32int *tab)
+{
+ const u8int *p = (const u8int *)buf;
+
+ while (size--)
+ crc = tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
+
+ return (crc);
+}
+
+u32int ext4_crc32(u32int crc, const void *buf, u32int size)
+{
+ return crc32(crc, buf, size, crc32_tab);
+}
+
+u32int ext4_crc32c(u32int crc, const void *buf, u32int size)
+{
+ return crc32(crc, buf, size, crc32c_tab);
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_debug.c
@@ -1,0 +1,21 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+
+static u32int debug_mask;
+
+void ext4_dmask_set(u32int m)
+{
+ debug_mask |= m;
+}
+
+void ext4_dmask_clr(u32int m)
+{
+ debug_mask &= ~m;
+}
+
+u32int ext4_dmask_get(void)
+{
+ return debug_mask;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_dir.c
@@ -1,0 +1,649 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_trans.h"
+#include "ext4_dir.h"
+#include "ext4_dir_idx.h"
+#include "ext4_crc32.h"
+#include "ext4_inode.h"
+#include "ext4_fs.h"
+
+/* Walk through a dirent block to find a checksum "dirent" at the tail */
+static struct ext4_dir_entry_tail *
+ext4_dir_get_tail(struct ext4_inode_ref *inode_ref,
+ struct ext4_dir_en *de)
+{
+ struct ext4_dir_entry_tail *t;
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+
+ t = EXT4_DIRENT_TAIL(de, ext4_sb_get_block_size(sb));
+
+ if (t->reserved_zero1 || t->reserved_zero2)
+ return nil;
+ if (to_le16(t->rec_len) != sizeof(struct ext4_dir_entry_tail))
+ return nil;
+ if (t->reserved_ft != EXT4_DIRENTRY_DIR_CSUM)
+ return nil;
+
+ return t;
+}
+
+static u32int ext4_dir_csum(struct ext4_inode_ref *inode_ref,
+ struct ext4_dir_en *dirent, int size)
+{
+ u32int csum;
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+ u32int ino_index = to_le32(inode_ref->index);
+ u32int ino_gen = to_le32(ext4_inode_get_generation(inode_ref->inode));
+
+ /* First calculate crc32 checksum against fs uuid */
+ csum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid, sizeof(sb->uuid));
+ /* Then calculate crc32 checksum against inode number
+ * and inode generation */
+ csum = ext4_crc32c(csum, &ino_index, sizeof(ino_index));
+ csum = ext4_crc32c(csum, &ino_gen, sizeof(ino_gen));
+ /* Finally calculate crc32 checksum against directory entries */
+ csum = ext4_crc32c(csum, dirent, size);
+ return csum;
+}
+
+bool ext4_dir_csum_verify(struct ext4_inode_ref *inode_ref,
+ struct ext4_dir_en *dirent)
+{
+ struct ext4_dir_entry_tail *t;
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+
+ /* Compute the checksum only if the filesystem supports it */
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+ t = ext4_dir_get_tail(inode_ref, dirent);
+ if (!t) {
+ /* There is no space to hold the checksum */
+ return false;
+ }
+
+ intptr diff = (char *)t - (char *)dirent;
+ u32int csum = ext4_dir_csum(inode_ref, dirent, diff);
+ if (t->checksum != to_le32(csum))
+ return false;
+
+ }
+ return true;
+}
+
+void ext4_dir_init_entry_tail(struct ext4_dir_entry_tail *t)
+{
+ memset(t, 0, sizeof(struct ext4_dir_entry_tail));
+ t->rec_len = to_le16(sizeof(struct ext4_dir_entry_tail));
+ t->reserved_ft = EXT4_DIRENTRY_DIR_CSUM;
+}
+
+void ext4_dir_set_csum(struct ext4_inode_ref *inode_ref,
+ struct ext4_dir_en *dirent)
+{
+ struct ext4_dir_entry_tail *t;
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+
+ /* Compute the checksum only if the filesystem supports it */
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+ t = ext4_dir_get_tail(inode_ref, dirent);
+ if (!t) {
+ /* There is no space to hold the checksum */
+ return;
+ }
+
+ intptr diff = (char *)t - (char *)dirent;
+ u32int csum = ext4_dir_csum(inode_ref, dirent, diff);
+ t->checksum = to_le32(csum);
+ }
+}
+
+/**@brief Do some checks before returning iterator.
+ * @param it Iterator to be checked
+ * @param block_size Size of data block
+ * @return Error code
+ */
+static int ext4_dir_iterator_set(struct ext4_dir_iter *it,
+ u32int block_size)
+{
+ u32int off_in_block = it->curr_off % block_size;
+ struct ext4_sblock *sb = &it->inode_ref->fs->sb;
+
+ it->curr = nil;
+
+ /* Ensure proper alignment */
+ if ((off_in_block % 4) != 0)
+ goto Ioerr;
+
+ /* Ensure that the core of the entry does not overflow the block */
+ if (off_in_block > block_size - 8)
+ goto Ioerr;
+
+ struct ext4_dir_en *en;
+ en = (void *)(it->curr_blk.data + off_in_block);
+
+ /* Ensure that the whole entry does not overflow the block */
+ u16int length = ext4_dir_en_get_entry_len(en);
+ if (off_in_block + length > block_size)
+ goto Ioerr;
+
+ /* Ensure the name length is not too large */
+ if (ext4_dir_en_get_name_len(sb, en) > length - 8)
+ goto Ioerr;
+
+ /* Everything OK - "publish" the entry */
+ it->curr = en;
+ return 0;
+Ioerr:
+ werrstr("i/o error");
+ return -1;
+}
+
+/**@brief Seek to next valid directory entry.
+ * Here can be jumped to the next data block.
+ * @param it Initialized iterator
+ * @param pos Position of the next entry
+ * @return Error code
+ */
+static int ext4_dir_iterator_seek(struct ext4_dir_iter *it, u64int pos)
+{
+ struct ext4_sblock *sb = &it->inode_ref->fs->sb;
+ struct ext4_inode *inode = it->inode_ref->inode;
+ struct ext4_blockdev *bdev = it->inode_ref->fs->bdev;
+ u64int size = ext4_inode_get_size(sb, inode);
+ int r;
+
+ /* The iterator is not valid until we seek to the desired position */
+ it->curr = nil;
+
+ /* Are we at the end? */
+ if (pos >= size) {
+ if (it->curr_blk.lb_id) {
+
+ r = ext4_block_set(bdev, &it->curr_blk);
+ it->curr_blk.lb_id = 0;
+ if (r != 0)
+ return r;
+ }
+
+ it->curr_off = pos;
+ return 0;
+ }
+
+ /* Compute next block address */
+ u32int block_size = ext4_sb_get_block_size(sb);
+ u64int current_blk_idx = it->curr_off / block_size;
+ u32int next_blk_idx = (u32int)(pos / block_size);
+
+ /*
+ * If we don't have a block or are moving across block boundary,
+ * we need to get another block
+ */
+ if ((it->curr_blk.lb_id == 0) ||
+ (current_blk_idx != next_blk_idx)) {
+ if (it->curr_blk.lb_id) {
+ r = ext4_block_set(bdev, &it->curr_blk);
+ it->curr_blk.lb_id = 0;
+
+ if (r != 0)
+ return r;
+ }
+
+ ext4_fsblk_t next_blk;
+ r = ext4_fs_get_inode_dblk_idx(it->inode_ref, next_blk_idx,
+ &next_blk, false);
+ if (r != 0)
+ return r;
+
+ r = ext4_trans_block_get(bdev, &it->curr_blk, next_blk);
+ if (r != 0) {
+ it->curr_blk.lb_id = 0;
+ return r;
+ }
+ }
+
+ it->curr_off = pos;
+ return ext4_dir_iterator_set(it, block_size);
+}
+
+int ext4_dir_iterator_init(struct ext4_dir_iter *it,
+ struct ext4_inode_ref *inode_ref, u64int pos)
+{
+ it->inode_ref = inode_ref;
+ it->curr = 0;
+ it->curr_off = 0;
+ it->curr_blk.lb_id = 0;
+
+ return ext4_dir_iterator_seek(it, pos);
+}
+
+int ext4_dir_iterator_next(struct ext4_dir_iter *it)
+{
+ int r = 0;
+ u16int skip;
+
+ while (r == 0) {
+ skip = ext4_dir_en_get_entry_len(it->curr);
+ r = ext4_dir_iterator_seek(it, it->curr_off + skip);
+
+ if (!it->curr)
+ break;
+ /*Skip nil referenced entry*/
+ if (ext4_dir_en_get_inode(it->curr) != 0)
+ break;
+ }
+
+ return r;
+}
+
+int ext4_dir_iterator_fini(struct ext4_dir_iter *it)
+{
+ it->curr = 0;
+
+ if (it->curr_blk.lb_id)
+ return ext4_block_set(it->inode_ref->fs->bdev, &it->curr_blk);
+
+ return 0;
+}
+
+void ext4_dir_write_entry(struct ext4_sblock *sb, struct ext4_dir_en *en,
+ u16int entry_len, struct ext4_inode_ref *child,
+ const char *name, usize name_len)
+{
+ /* Check maximum entry length */
+ assert(entry_len <= ext4_sb_get_block_size(sb));
+
+ /* Set type of entry */
+ switch (ext4_inode_type(sb, child->inode)) {
+ case EXT4_INODE_MODE_DIRECTORY:
+ ext4_dir_en_set_inode_type(sb, en, EXT4_DE_DIR);
+ break;
+ case EXT4_INODE_MODE_FILE:
+ ext4_dir_en_set_inode_type(sb, en, EXT4_DE_REG_FILE);
+ break;
+ case EXT4_INODE_MODE_SOFTLINK:
+ ext4_dir_en_set_inode_type(sb, en, EXT4_DE_SYMLINK);
+ break;
+ case EXT4_INODE_MODE_CHARDEV:
+ ext4_dir_en_set_inode_type(sb, en, EXT4_DE_CHRDEV);
+ break;
+ case EXT4_INODE_MODE_BLOCKDEV:
+ ext4_dir_en_set_inode_type(sb, en, EXT4_DE_BLKDEV);
+ break;
+ case EXT4_INODE_MODE_FIFO:
+ ext4_dir_en_set_inode_type(sb, en, EXT4_DE_FIFO);
+ break;
+ case EXT4_INODE_MODE_SOCKET:
+ ext4_dir_en_set_inode_type(sb, en, EXT4_DE_SOCK);
+ break;
+ default:
+ /* FIXME: unsupported filetype */
+ ext4_dir_en_set_inode_type(sb, en, EXT4_DE_UNKNOWN);
+ }
+
+ /* Set basic attributes */
+ ext4_dir_en_set_inode(en, child->index);
+ ext4_dir_en_set_entry_len(en, entry_len);
+ ext4_dir_en_set_name_len(sb, en, (u16int)name_len);
+
+ /* Write name */
+ memcpy(en->name, name, name_len);
+}
+
+int ext4_dir_add_entry(struct ext4_inode_ref *parent, const char *name,
+ u32int name_len, struct ext4_inode_ref *child)
+{
+ int r;
+ struct ext4_fs *fs = parent->fs;
+ struct ext4_sblock *sb = &parent->fs->sb;
+
+ /* Index adding (if allowed) */
+ if ((ext4_sb_feature_com(sb, EXT4_FCOM_DIR_INDEX)) &&
+ (ext4_inode_has_flag(parent->inode, EXT4_INODE_FLAG_INDEX))) {
+ r = ext4_dir_dx_add_entry(parent, child, name, name_len);
+ /* Check if index is corrupted */
+ if (r == EXT4_ERR_BAD_DX_DIR) {
+ /* Needed to clear dir index flag if corrupted */
+ ext4_inode_clear_flag(parent->inode, EXT4_INODE_FLAG_INDEX);
+ parent->dirty = true;
+ } else if (r == 0) {
+ return 0;
+ }
+ }
+
+ /* Linear algorithm */
+ u32int iblock = 0;
+ ext4_fsblk_t fblock = 0;
+ u32int block_size = ext4_sb_get_block_size(sb);
+ u64int inode_size = ext4_inode_get_size(sb, parent->inode);
+ u32int total_blocks = (u32int)(inode_size / block_size);
+
+ /* Find block, where is space for new entry and try to add */
+ bool success = false;
+ for (iblock = 0; iblock < total_blocks; ++iblock) {
+ r = ext4_fs_get_inode_dblk_idx(parent, iblock, &fblock, false);
+ if (r != 0)
+ return r;
+
+ struct ext4_block block;
+ r = ext4_trans_block_get(fs->bdev, &block, fblock);
+ if (r != 0)
+ return r;
+
+ if (!ext4_dir_csum_verify(parent, (void *)block.data)) {
+ ext4_dbg(DEBUG_DIR,
+ DBG_WARN "Leaf block checksum failed."
+ "Inode: %ud, "
+ "Block: %ud\n",
+ parent->index,
+ iblock);
+ }
+
+ /* If adding is successful, function can finish */
+ r = ext4_dir_try_insert_entry(sb, parent, &block, child,
+ name, name_len);
+ if (r == 0)
+ success = true;
+
+ r = ext4_block_set(fs->bdev, &block);
+ if (r != 0)
+ return r;
+
+ if (success)
+ return 0;
+ }
+
+ /* No free block found - needed to allocate next data block */
+
+ iblock = 0;
+ fblock = 0;
+ r = ext4_fs_append_inode_dblk(parent, &fblock, &iblock);
+ if (r != 0)
+ return r;
+
+ /* Load new block */
+ struct ext4_block b;
+
+ r = ext4_trans_block_get_noread(fs->bdev, &b, fblock);
+ if (r != 0)
+ return r;
+
+ /* Fill block with zeroes */
+ memset(b.data, 0, block_size);
+ struct ext4_dir_en *blk_en = (void *)b.data;
+
+ /* Save new block */
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+ u16int el = block_size - sizeof(struct ext4_dir_entry_tail);
+ ext4_dir_write_entry(sb, blk_en, el, child, name, name_len);
+ ext4_dir_init_entry_tail(EXT4_DIRENT_TAIL(b.data, block_size));
+ } else {
+ ext4_dir_write_entry(sb, blk_en, block_size, child, name,
+ name_len);
+ }
+
+ ext4_dir_set_csum(parent, (void *)b.data);
+ ext4_trans_set_block_dirty(b.buf);
+ r = ext4_block_set(fs->bdev, &b);
+
+ return r;
+}
+
+int ext4_dir_find_entry(struct ext4_dir_search_result *result,
+ struct ext4_inode_ref *parent, const char *name,
+ u32int name_len)
+{
+ int r;
+ struct ext4_sblock *sb = &parent->fs->sb;
+
+ /* Entry clear */
+ result->block.lb_id = 0;
+ result->dentry = nil;
+
+ /* Index search */
+ if ((ext4_sb_feature_com(sb, EXT4_FCOM_DIR_INDEX)) &&
+ (ext4_inode_has_flag(parent->inode, EXT4_INODE_FLAG_INDEX))) {
+ r = ext4_dir_dx_find_entry(result, parent, name_len, name);
+ /* Check if index is corrupted */
+ if (r == EXT4_ERR_BAD_DX_DIR) {
+ /* Needed to clear dir index flag if corrupted */
+ ext4_inode_clear_flag(parent->inode, EXT4_INODE_FLAG_INDEX);
+ parent->dirty = true;
+ } else if (r == 0) {
+ return 0;
+ }
+ }
+
+ /* Linear algorithm */
+
+ u32int iblock;
+ ext4_fsblk_t fblock;
+ u32int block_size = ext4_sb_get_block_size(sb);
+ u64int inode_size = ext4_inode_get_size(sb, parent->inode);
+ u32int total_blocks = (u32int)(inode_size / block_size);
+
+ /* Walk through all data blocks */
+ for (iblock = 0; iblock < total_blocks; ++iblock) {
+ /* Load block address */
+ r = ext4_fs_get_inode_dblk_idx(parent, iblock, &fblock, false);
+ if (r != 0)
+ return r;
+
+ /* Load data block */
+ struct ext4_block b;
+ r = ext4_trans_block_get(parent->fs->bdev, &b, fblock);
+ if (r != 0){
+ werrstr("ext4_trans_block_get: %r");
+ return r;
+ }
+
+ if (!ext4_dir_csum_verify(parent, (void *)b.data)) {
+ ext4_dbg(DEBUG_DIR,
+ DBG_WARN "Leaf block checksum failed."
+ "Inode: %ud, "
+ "Block: %ud\n",
+ parent->index,
+ iblock);
+ }
+
+ /* Try to find entry in block */
+ struct ext4_dir_en *res_entry;
+ r = ext4_dir_find_in_block(&b, sb, name_len, name, &res_entry);
+ if (r == 0) {
+ result->block = b;
+ result->dentry = res_entry;
+ return 0;
+ }
+
+ /* Entry not found - put block and continue to the next block */
+
+ r = ext4_block_set(parent->fs->bdev, &b);
+ if (r != 0)
+ return r;
+ }
+
+ return EXT4_ERR_NOT_FOUND;
+}
+
+int ext4_dir_remove_entry(struct ext4_inode_ref *parent, const char *name,
+ u32int name_len)
+{
+ struct ext4_sblock *sb = &parent->fs->sb;
+ /* Check if removing from directory */
+ if (!ext4_inode_is_type(sb, parent->inode, EXT4_INODE_MODE_DIRECTORY)) {
+ werrstr("not a directory");
+ return -1;
+ }
+
+ /* Try to find entry */
+ struct ext4_dir_search_result result;
+ int rc = ext4_dir_find_entry(&result, parent, name, name_len);
+ if (rc != 0)
+ return rc;
+
+ /* Invalidate entry */
+ ext4_dir_en_set_inode(result.dentry, 0);
+
+ /* Store entry position in block */
+ u32int pos = (u8int *)result.dentry - result.block.data;
+
+ /*
+ * If entry is not the first in block, it must be merged
+ * with previous entry
+ */
+ if (pos != 0) {
+ u32int offset = 0;
+
+ /* Start from the first entry in block */
+ struct ext4_dir_en *tmp_de =(void *)result.block.data;
+ u16int de_len = ext4_dir_en_get_entry_len(tmp_de);
+
+ /* Find direct predecessor of removed entry */
+ while ((offset + de_len) < pos) {
+ offset += ext4_dir_en_get_entry_len(tmp_de);
+ tmp_de = (void *)(result.block.data + offset);
+ de_len = ext4_dir_en_get_entry_len(tmp_de);
+ }
+
+ assert(de_len + offset == pos);
+
+ /* Add to removed entry length to predecessor's length */
+ u16int del_len;
+ del_len = ext4_dir_en_get_entry_len(result.dentry);
+ ext4_dir_en_set_entry_len(tmp_de, de_len + del_len);
+ }
+
+ ext4_dir_set_csum(parent,
+ (struct ext4_dir_en *)result.block.data);
+ ext4_trans_set_block_dirty(result.block.buf);
+
+ return ext4_dir_destroy_result(parent, &result);
+}
+
+int ext4_dir_try_insert_entry(struct ext4_sblock *sb,
+ struct ext4_inode_ref *inode_ref,
+ struct ext4_block *dst_blk,
+ struct ext4_inode_ref *child, const char *name,
+ u32int name_len)
+{
+ /* Compute required length entry and align it to 4 bytes */
+ u32int block_size = ext4_sb_get_block_size(sb);
+ u16int required_len = sizeof(struct ext4_fake_dir_entry) + name_len;
+
+ if ((required_len % 4) != 0)
+ required_len += 4 - (required_len % 4);
+
+ /* Initialize pointers, stop means to upper bound */
+ struct ext4_dir_en *start = (void *)dst_blk->data;
+ struct ext4_dir_en *stop = (void *)(dst_blk->data + block_size);
+
+ /*
+ * Walk through the block and check for invalid entries
+ * or entries with free space for new entry
+ */
+ while (start < stop) {
+ u32int inode = ext4_dir_en_get_inode(start);
+ u16int rec_len = ext4_dir_en_get_entry_len(start);
+ u8int itype = ext4_dir_en_get_inode_type(sb, start);
+
+ /* If invalid and large enough entry, use it */
+ if ((inode == 0) && (itype != EXT4_DIRENTRY_DIR_CSUM) &&
+ (rec_len >= required_len)) {
+ ext4_dir_write_entry(sb, start, rec_len, child, name,
+ name_len);
+ ext4_dir_set_csum(inode_ref, (void *)dst_blk->data);
+ ext4_trans_set_block_dirty(dst_blk->buf);
+
+ return 0;
+ }
+
+ /* Valid entry, try to split it */
+ if (inode != 0) {
+ u16int used_len;
+ used_len = ext4_dir_en_get_name_len(sb, start);
+
+ u16int sz;
+ sz = sizeof(struct ext4_fake_dir_entry) + used_len;
+
+ if ((used_len % 4) != 0)
+ sz += 4 - (used_len % 4);
+
+ u16int free_space = rec_len - sz;
+
+ /* There is free space for new entry */
+ if (free_space >= required_len) {
+ /* Cut tail of current entry */
+ struct ext4_dir_en * new_entry;
+ new_entry = (void *)((u8int *)start + sz);
+ ext4_dir_en_set_entry_len(start, sz);
+ ext4_dir_write_entry(sb, new_entry, free_space,
+ child, name, name_len);
+
+ ext4_dir_set_csum(inode_ref,
+ (void *)dst_blk->data);
+ ext4_trans_set_block_dirty(dst_blk->buf);
+ return 0;
+ }
+ }
+
+ /* Jump to the next entry */
+ start = (void *)((u8int *)start + rec_len);
+ }
+
+ /* No free space found for new entry */
+ werrstr("no space");
+ return -1;
+}
+
+int ext4_dir_find_in_block(struct ext4_block *block, struct ext4_sblock *sb,
+ usize name_len, const char *name,
+ struct ext4_dir_en **res_entry)
+{
+ /* Start from the first entry in block */
+ struct ext4_dir_en *de = (struct ext4_dir_en *)block->data;
+
+ /* Set upper bound for cycling */
+ u8int *addr_limit = block->data + ext4_sb_get_block_size(sb);
+
+ /* Walk through the block and check entries */
+ while ((u8int *)de < addr_limit) {
+ /* Termination condition */
+ if ((u8int *)de + name_len > addr_limit)
+ break;
+
+ /* Valid entry - check it */
+ if (ext4_dir_en_get_inode(de) != 0) {
+ /* For more efficient compare only lengths firstly*/
+ int el = ext4_dir_en_get_name_len(sb, de);
+ if (el == name_len) {
+ /* Compare names */
+ if (memcmp(name, de->name, name_len) == 0) {
+ *res_entry = de;
+ return 0;
+ }
+ }
+ }
+
+ u16int de_len = ext4_dir_en_get_entry_len(de);
+
+ if (de_len == 0) {
+ werrstr("corrupt entry");
+ return -1;
+ }
+
+ /* Jump to next entry */
+ de = (struct ext4_dir_en *)((u8int *)de + de_len);
+ }
+
+ /* Entry not found */
+ return EXT4_ERR_NOT_FOUND;
+}
+
+int ext4_dir_destroy_result(struct ext4_inode_ref *parent,
+ struct ext4_dir_search_result *result)
+{
+ if (result->block.lb_id)
+ return ext4_block_set(parent->fs->bdev, &result->block);
+
+ return 0;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_dir_idx.c
@@ -1,0 +1,1356 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_trans.h"
+#include "ext4_dir_idx.h"
+#include "ext4_dir.h"
+#include "ext4_blockdev.h"
+#include "ext4_fs.h"
+#include "ext4_super.h"
+#include "ext4_inode.h"
+#include "ext4_crc32.h"
+#include "ext4_hash.h"
+
+/**@brief Get hash version used in directory index.
+ * @param root_info Pointer to root info structure of index
+ * @return Hash algorithm version
+ */
+static inline u8int
+ext4_dir_dx_rinfo_get_hash_version(struct ext4_dir_idx_rinfo *ri)
+{
+ return ri->hash_version;
+}
+
+/**@brief Set hash version, that will be used in directory index.
+ * @param root_info Pointer to root info structure of index
+ * @param v Hash algorithm version
+ */
+static inline void
+ext4_dir_dx_rinfo_set_hash_version(struct ext4_dir_idx_rinfo *ri, u8int v)
+{
+ ri->hash_version = v;
+}
+
+/**@brief Get length of root_info structure in bytes.
+ * @param root_info Pointer to root info structure of index
+ * @return Length of the structure
+ */
+static inline u8int
+ext4_dir_dx_rinfo_get_info_length(struct ext4_dir_idx_rinfo *ri)
+{
+ return ri->info_length;
+}
+
+/**@brief Set length of root_info structure in bytes.
+ * @param root_info Pointer to root info structure of index
+ * @param info_length Length of the structure
+ */
+static inline void
+ext4_dir_dx_root_info_set_info_length(struct ext4_dir_idx_rinfo *ri,
+ u8int len)
+{
+ ri->info_length = len;
+}
+
+/**@brief Get number of indirect levels of HTree.
+ * @param root_info Pointer to root info structure of index
+ * @return Height of HTree (actually only 0 or 1)
+ */
+static inline u8int
+ext4_dir_dx_rinfo_get_indirect_levels(struct ext4_dir_idx_rinfo *ri)
+{
+ return ri->indirect_levels;
+}
+
+/**@brief Set number of indirect levels of HTree.
+ * @param root_info Pointer to root info structure of index
+ * @param lvl Height of HTree (actually only 0 or 1)
+ */
+static inline void
+ext4_dir_dx_rinfo_set_indirect_levels(struct ext4_dir_idx_rinfo *ri, u8int l)
+{
+ ri->indirect_levels = l;
+}
+
+/**@brief Get maximum number of index node entries.
+ * @param climit Pointer to counlimit structure
+ * @return Maximum of entries in node
+ */
+static inline u16int
+ext4_dir_dx_climit_get_limit(struct ext4_dir_idx_climit *climit)
+{
+ return to_le16(climit->limit);
+}
+
+/**@brief Set maximum number of index node entries.
+ * @param climit Pointer to counlimit structure
+ * @param limit Maximum of entries in node
+ */
+static inline void
+ext4_dir_dx_climit_set_limit(struct ext4_dir_idx_climit *climit, u16int limit)
+{
+ climit->limit = to_le16(limit);
+}
+
+/**@brief Get current number of index node entries.
+ * @param climit Pointer to counlimit structure
+ * @return Number of entries in node
+ */
+static inline u16int
+ext4_dir_dx_climit_get_count(struct ext4_dir_idx_climit *climit)
+{
+ return to_le16(climit->count);
+}
+
+/**@brief Set current number of index node entries.
+ * @param climit Pointer to counlimit structure
+ * @param count Number of entries in node
+ */
+static inline void
+ext4_dir_dx_climit_set_count(struct ext4_dir_idx_climit *climit, u16int count)
+{
+ climit->count = to_le16(count);
+}
+
+/**@brief Get hash value of index entry.
+ * @param entry Pointer to index entry
+ * @return Hash value
+ */
+static inline u32int
+ext4_dir_dx_entry_get_hash(struct ext4_dir_idx_entry *entry)
+{
+ return to_le32(entry->hash);
+}
+
+/**@brief Set hash value of index entry.
+ * @param entry Pointer to index entry
+ * @param hash Hash value
+ */
+static inline void
+ext4_dir_dx_entry_set_hash(struct ext4_dir_idx_entry *entry, u32int hash)
+{
+ entry->hash = to_le32(hash);
+}
+
+/**@brief Get block address where child node is located.
+ * @param entry Pointer to index entry
+ * @return Block address of child node
+ */
+static inline u32int
+ext4_dir_dx_entry_get_block(struct ext4_dir_idx_entry *entry)
+{
+ return to_le32(entry->block);
+}
+
+/**@brief Set block address where child node is located.
+ * @param entry Pointer to index entry
+ * @param block Block address of child node
+ */
+static inline void
+ext4_dir_dx_entry_set_block(struct ext4_dir_idx_entry *entry, u32int block)
+{
+ entry->block = to_le32(block);
+}
+
+/**@brief Sort entry item.*/
+struct ext4_dx_sort_entry {
+ u32int hash;
+ u32int rec_len;
+ void *dentry;
+};
+
+static int ext4_dir_dx_hash_string(struct ext4_hash_info *hinfo, int len,
+ const char *name)
+{
+ return ext2_htree_hash(name, len, hinfo->seed, hinfo->hash_version,
+ &hinfo->hash, &hinfo->minor_hash);
+}
+
+static u32int ext4_dir_dx_checksum(struct ext4_inode_ref *inode_ref, void *de,
+ int count_offset, int count,
+ struct ext4_dir_idx_tail *t)
+{
+ u32int orig_cum, csum = 0;
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+ int sz;
+
+ /* Compute the checksum only if the filesystem supports it */
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+ u32int ino_index = to_le32(inode_ref->index);
+ u32int ino_gen;
+ ino_gen = to_le32(ext4_inode_get_generation(inode_ref->inode));
+
+ sz = count_offset + (count * sizeof(struct ext4_dir_idx_tail));
+ orig_cum = t->checksum;
+ t->checksum = 0;
+ /* First calculate crc32 checksum against fs uuid */
+ csum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid, sizeof(sb->uuid));
+ /* Then calculate crc32 checksum against inode number
+ * and inode generation */
+ csum = ext4_crc32c(csum, &ino_index, sizeof(ino_index));
+ csum = ext4_crc32c(csum, &ino_gen, sizeof(ino_gen));
+ /* After that calculate crc32 checksum against all the dx_entry */
+ csum = ext4_crc32c(csum, de, sz);
+ /* Finally calculate crc32 checksum for dx_tail */
+ csum = ext4_crc32c(csum, t, sizeof(struct ext4_dir_idx_tail));
+ t->checksum = orig_cum;
+ }
+ return csum;
+}
+
+static struct ext4_dir_idx_climit *
+ext4_dir_dx_get_climit(struct ext4_inode_ref *inode_ref,
+ struct ext4_dir_en *dirent, int *offset)
+{
+ struct ext4_dir_en *dp;
+ struct ext4_dir_idx_root *root;
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+ u32int block_size = ext4_sb_get_block_size(sb);
+ u16int entry_len = ext4_dir_en_get_entry_len(dirent);
+ int count_offset;
+
+
+ if (entry_len == 12) {
+ root = (struct ext4_dir_idx_root *)dirent;
+ dp = (struct ext4_dir_en *)&root->dots[1];
+ if (ext4_dir_en_get_entry_len(dp) != (block_size - 12))
+ return nil;
+ if (root->info.reserved_zero)
+ return nil;
+ if (root->info.info_length != sizeof(struct ext4_dir_idx_rinfo))
+ return nil;
+ count_offset = 32;
+ } else if (entry_len == block_size) {
+ count_offset = 8;
+ } else {
+ return nil;
+ }
+
+ if (offset)
+ *offset = count_offset;
+ return (struct ext4_dir_idx_climit *)(((char *)dirent) + count_offset);
+}
+
+/*
+ * BIG FAT NOTES:
+ * Currently we do not verify the checksum of HTree node.
+ */
+static bool ext4_dir_dx_csum_verify(struct ext4_inode_ref *inode_ref,
+ struct ext4_dir_en *de)
+{
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+ u32int block_size = ext4_sb_get_block_size(sb);
+ int coff, limit, cnt;
+
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+ struct ext4_dir_idx_climit *climit;
+ climit = ext4_dir_dx_get_climit(inode_ref, de, &coff);
+ if (!climit) {
+ /* Directory seems corrupted. */
+ return true;
+ }
+ struct ext4_dir_idx_tail *t;
+ limit = ext4_dir_dx_climit_get_limit(climit);
+ cnt = ext4_dir_dx_climit_get_count(climit);
+ if (coff + (limit * sizeof(struct ext4_dir_idx_entry)) >
+ (block_size - sizeof(struct ext4_dir_idx_tail))) {
+ /* There is no space to hold the checksum */
+ return true;
+ }
+ t = (void *)(((struct ext4_dir_idx_entry *)climit) + limit);
+
+ u32int c;
+ c = to_le32(ext4_dir_dx_checksum(inode_ref, de, coff, cnt, t));
+ if (t->checksum != c)
+ return false;
+ }
+ return true;
+}
+
+
+static void ext4_dir_set_dx_csum(struct ext4_inode_ref *inode_ref,
+ struct ext4_dir_en *dirent)
+{
+ int coff, limit, count;
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+ u32int block_size = ext4_sb_get_block_size(sb);
+
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+ struct ext4_dir_idx_climit *climit;
+ climit = ext4_dir_dx_get_climit(inode_ref, dirent, &coff);
+ if (!climit) {
+ /* Directory seems corrupted. */
+ return;
+ }
+ struct ext4_dir_idx_tail *t;
+ limit = ext4_dir_dx_climit_get_limit(climit);
+ count = ext4_dir_dx_climit_get_count(climit);
+ if (coff + (limit * sizeof(struct ext4_dir_idx_entry)) >
+ (block_size - sizeof(struct ext4_dir_idx_tail))) {
+ /* There is no space to hold the checksum */
+ return;
+ }
+
+ t = (void *)(((struct ext4_dir_idx_entry *)climit) + limit);
+ t->checksum = to_le32(ext4_dir_dx_checksum(inode_ref, dirent,
+ coff, count, t));
+ }
+}
+
+/****************************************************************************/
+
+int ext4_dir_dx_init(struct ext4_inode_ref *dir, struct ext4_inode_ref *parent)
+{
+ /* Load block 0, where will be index root located */
+ ext4_fsblk_t fblock;
+ u32int iblock = 0;
+ bool need_append =
+ (ext4_inode_get_size(&dir->fs->sb, dir->inode)
+ < EXT4_DIR_DX_INIT_BCNT)
+ ? true : false;
+ struct ext4_sblock *sb = &dir->fs->sb;
+ u32int block_size = ext4_sb_get_block_size(&dir->fs->sb);
+ struct ext4_block block;
+
+ int rc;
+
+ if (!need_append)
+ rc = ext4_fs_init_inode_dblk_idx(dir, iblock, &fblock);
+ else
+ rc = ext4_fs_append_inode_dblk(dir, &fblock, &iblock);
+
+ if (rc != 0)
+ return rc;
+
+ rc = ext4_trans_block_get_noread(dir->fs->bdev, &block, fblock);
+ if (rc != 0)
+ return rc;
+
+ /* Initialize pointers to data structures */
+ struct ext4_dir_idx_root *root = (void *)block.data;
+ struct ext4_dir_idx_rinfo *info = &(root->info);
+
+ memset(root, 0, sizeof(struct ext4_dir_idx_root));
+ struct ext4_dir_en *de;
+
+ /* Initialize dot entries */
+ de = (struct ext4_dir_en *)root->dots;
+ ext4_dir_write_entry(sb, de, 12, dir, ".", strlen("."));
+
+ de = (struct ext4_dir_en *)(root->dots + 1);
+ u16int elen = block_size - 12;
+ ext4_dir_write_entry(sb, de, elen, parent, "..", strlen(".."));
+
+ /* Initialize root info structure */
+ u8int hash_version = ext4_get8(&dir->fs->sb, default_hash_version);
+
+ ext4_dir_dx_rinfo_set_hash_version(info, hash_version);
+ ext4_dir_dx_rinfo_set_indirect_levels(info, 0);
+ ext4_dir_dx_root_info_set_info_length(info, 8);
+
+ /* Set limit and current number of entries */
+ struct ext4_dir_idx_climit *climit;
+ climit = (struct ext4_dir_idx_climit *)root->en;
+
+ ext4_dir_dx_climit_set_count(climit, 1);
+
+ u32int entry_space;
+ entry_space = block_size - 2 * sizeof(struct ext4_dir_idx_dot_en) -
+ sizeof(struct ext4_dir_idx_rinfo);
+
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+ entry_space -= sizeof(struct ext4_dir_idx_tail);
+
+ u16int root_limit = entry_space / sizeof(struct ext4_dir_idx_entry);
+ ext4_dir_dx_climit_set_limit(climit, root_limit);
+
+ /* Append new block, where will be new entries inserted in the future */
+ iblock++;
+ if (!need_append)
+ rc = ext4_fs_init_inode_dblk_idx(dir, iblock, &fblock);
+ else
+ rc = ext4_fs_append_inode_dblk(dir, &fblock, &iblock);
+
+ if (rc != 0) {
+ ext4_block_set(dir->fs->bdev, &block);
+ return rc;
+ }
+
+ struct ext4_block new_block;
+ rc = ext4_trans_block_get_noread(dir->fs->bdev, &new_block, fblock);
+ if (rc != 0) {
+ ext4_block_set(dir->fs->bdev, &block);
+ return rc;
+ }
+
+ /* Fill the whole block with empty entry */
+ struct ext4_dir_en *be = (void *)new_block.data;
+
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+ u16int len = block_size - sizeof(struct ext4_dir_entry_tail);
+ ext4_dir_en_set_entry_len(be, len);
+ ext4_dir_en_set_name_len(sb, be, 0);
+ ext4_dir_en_set_inode_type(sb, be, EXT4_DE_UNKNOWN);
+ ext4_dir_init_entry_tail(EXT4_DIRENT_TAIL(be, block_size));
+ ext4_dir_set_csum(dir, be);
+ } else {
+ ext4_dir_en_set_entry_len(be, block_size);
+ }
+
+ ext4_dir_en_set_inode(be, 0);
+
+ ext4_trans_set_block_dirty(new_block.buf);
+ rc = ext4_block_set(dir->fs->bdev, &new_block);
+ if (rc != 0) {
+ ext4_block_set(dir->fs->bdev, &block);
+ return rc;
+ }
+
+ /* Connect new block to the only entry in index */
+ struct ext4_dir_idx_entry *entry = root->en;
+ ext4_dir_dx_entry_set_block(entry, iblock);
+
+ ext4_dir_set_dx_csum(dir, (struct ext4_dir_en *)block.data);
+ ext4_trans_set_block_dirty(block.buf);
+
+ return ext4_block_set(dir->fs->bdev, &block);
+}
+
+/**@brief Initialize hash info structure necessary for index operations.
+ * @param hinfo Pointer to hinfo to be initialized
+ * @param root_block Root block (number 0) of index
+ * @param sb Pointer to superblock
+ * @param name_len Length of name to be computed hash value from
+ * @param name Name to be computed hash value from
+ * @return Standard error code
+ */
+static int ext4_dir_hinfo_init(struct ext4_hash_info *hinfo,
+ struct ext4_block *root_block,
+ struct ext4_sblock *sb, usize name_len,
+ const char *name)
+{
+ struct ext4_dir_idx_root *root;
+
+ root = (struct ext4_dir_idx_root *)root_block->data;
+ if ((root->info.hash_version != EXT2_HTREE_LEGACY) &&
+ (root->info.hash_version != EXT2_HTREE_HALF_MD4) &&
+ (root->info.hash_version != EXT2_HTREE_TEA))
+ return EXT4_ERR_BAD_DX_DIR;
+
+ /* Check unused flags */
+ if (root->info.unused_flags != 0)
+ return EXT4_ERR_BAD_DX_DIR;
+
+ /* Check indirect levels */
+ if (root->info.indirect_levels > 1)
+ return EXT4_ERR_BAD_DX_DIR;
+
+ /* Check if node limit is correct */
+ u32int block_size = ext4_sb_get_block_size(sb);
+ u32int entry_space = block_size;
+ entry_space -= 2 * sizeof(struct ext4_dir_idx_dot_en);
+ entry_space -= sizeof(struct ext4_dir_idx_rinfo);
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+ entry_space -= sizeof(struct ext4_dir_idx_tail);
+ entry_space = entry_space / sizeof(struct ext4_dir_idx_entry);
+
+ struct ext4_dir_idx_climit *climit = (void *)root->en;
+ u16int limit = ext4_dir_dx_climit_get_limit(climit);
+ if (limit != entry_space)
+ return EXT4_ERR_BAD_DX_DIR;
+
+ /* Check hash version and modify if necessary */
+ hinfo->hash_version = ext4_dir_dx_rinfo_get_hash_version(&root->info);
+ if ((hinfo->hash_version <= EXT2_HTREE_TEA) &&
+ (ext4_sb_check_flag(sb, EXT4_SUPERBLOCK_FLAGS_UNSIGNED_HASH))) {
+ /* Use unsigned hash */
+ hinfo->hash_version += 3;
+ }
+
+ /* Load hash seed from superblock */
+ hinfo->seed = ext4_get8(sb, hash_seed);
+
+ /* Compute hash value of name */
+ if (name)
+ return ext4_dir_dx_hash_string(hinfo, name_len, name);
+
+ return 0;
+}
+
+/**@brief Walk through index tree and load leaf with corresponding hash value.
+ * @param hinfo Initialized hash info structure
+ * @param inode_ref Current i-node
+ * @param root_block Root block (iblock 0), where is root node located
+ * @param dx_block Pointer to leaf node in dx_blocks array
+ * @param dx_blocks Array with the whole path from root to leaf
+ * @return Standard error code
+ */
+static int ext4_dir_dx_get_leaf(struct ext4_hash_info *hinfo,
+ struct ext4_inode_ref *inode_ref,
+ struct ext4_block *root_block,
+ struct ext4_dir_idx_block **dx_block,
+ struct ext4_dir_idx_block *dx_blocks)
+{
+ struct ext4_dir_idx_root *root;
+ struct ext4_dir_idx_entry *entries;
+ struct ext4_dir_idx_entry *p;
+ struct ext4_dir_idx_entry *q;
+ struct ext4_dir_idx_entry *m;
+ struct ext4_dir_idx_entry *at;
+ ext4_fsblk_t fblk;
+ u32int block_size;
+ u16int limit;
+ u16int entry_space;
+ u8int ind_level;
+ int r;
+
+ struct ext4_dir_idx_block *tmp_dx_blk = dx_blocks;
+ struct ext4_block *tmp_blk = root_block;
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+
+ block_size = ext4_sb_get_block_size(sb);
+ root = (struct ext4_dir_idx_root *)root_block->data;
+ entries = (struct ext4_dir_idx_entry *)root->en;
+ limit = ext4_dir_dx_climit_get_limit((void *)entries);
+ ind_level = ext4_dir_dx_rinfo_get_indirect_levels(&root->info);
+
+ /* Walk through the index tree */
+ while (true) {
+ u16int cnt = ext4_dir_dx_climit_get_count((void *)entries);
+ if ((cnt == 0) || (cnt > limit))
+ return EXT4_ERR_BAD_DX_DIR;
+
+ /* Do binary search in every node */
+ p = entries + 1;
+ q = entries + cnt - 1;
+
+ while (p <= q) {
+ m = p + (q - p) / 2;
+ if (ext4_dir_dx_entry_get_hash(m) > hinfo->hash)
+ q = m - 1;
+ else
+ p = m + 1;
+ }
+
+ at = p - 1;
+
+ /* Write results */
+ memcpy(&tmp_dx_blk->b, tmp_blk, sizeof(struct ext4_block));
+ tmp_dx_blk->entries = entries;
+ tmp_dx_blk->position = at;
+
+ /* Is algorithm in the leaf? */
+ if (ind_level == 0) {
+ *dx_block = tmp_dx_blk;
+ return 0;
+ }
+
+ /* Goto child node */
+ u32int n_blk = ext4_dir_dx_entry_get_block(at);
+
+ ind_level--;
+
+ r = ext4_fs_get_inode_dblk_idx(inode_ref, n_blk, &fblk, false);
+ if (r != 0)
+ return r;
+
+ r = ext4_trans_block_get(inode_ref->fs->bdev, tmp_blk, fblk);
+ if (r != 0)
+ return r;
+
+ entries = ((struct ext4_dir_idx_node *)tmp_blk->data)->entries;
+ limit = ext4_dir_dx_climit_get_limit((void *)entries);
+
+ entry_space = block_size - sizeof(struct ext4_fake_dir_entry);
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+ entry_space -= sizeof(struct ext4_dir_idx_tail);
+
+ entry_space = entry_space / sizeof(struct ext4_dir_idx_entry);
+
+ if (limit != entry_space) {
+ ext4_block_set(inode_ref->fs->bdev, tmp_blk);
+ return EXT4_ERR_BAD_DX_DIR;
+ }
+
+ if (!ext4_dir_dx_csum_verify(inode_ref, (void *)tmp_blk->data)) {
+ ext4_dbg(DEBUG_DIR_IDX,
+ DBG_WARN "HTree checksum failed."
+ "Inode: %ud, "
+ "Block: %ud\n",
+ inode_ref->index,
+ n_blk);
+ }
+
+ ++tmp_dx_blk;
+ }
+
+ /* Unreachable */
+}
+
+/**@brief Check if the the next block would be checked during entry search.
+ * @param inode_ref Directory i-node
+ * @param hash Hash value to check
+ * @param dx_block Current block
+ * @param dx_blocks Array with path from root to leaf node
+ * @return Standard Error code
+ */
+static int ext4_dir_dx_next_block(struct ext4_inode_ref *inode_ref,
+ u32int hash,
+ struct ext4_dir_idx_block *dx_block,
+ struct ext4_dir_idx_block *dx_blocks)
+{
+ int r;
+ u32int num_handles = 0;
+ ext4_fsblk_t blk_adr;
+ struct ext4_dir_idx_block *p = dx_block;
+
+ /* Try to find data block with next bunch of entries */
+ while (true) {
+ u16int cnt = ext4_dir_dx_climit_get_count((void *)p->entries);
+
+ p->position++;
+ if (p->position < p->entries + cnt)
+ break;
+
+ if (p == dx_blocks)
+ return 0;
+
+ num_handles++;
+ p--;
+ }
+
+ /* Check hash collision (if not occurred - no next block cannot be
+ * used)*/
+ u32int current_hash = ext4_dir_dx_entry_get_hash(p->position);
+ if ((hash & 1) == 0) {
+ if ((current_hash & ~1) != hash)
+ return 0;
+ }
+
+ /* Fill new path */
+ while (num_handles--) {
+ u32int blk = ext4_dir_dx_entry_get_block(p->position);
+ r = ext4_fs_get_inode_dblk_idx(inode_ref, blk, &blk_adr, false);
+ if (r != 0)
+ return r;
+
+ struct ext4_block b;
+ r = ext4_trans_block_get(inode_ref->fs->bdev, &b, blk_adr);
+ if (r != 0)
+ return r;
+
+ if (!ext4_dir_dx_csum_verify(inode_ref, (void *)b.data)) {
+ ext4_dbg(DEBUG_DIR_IDX,
+ DBG_WARN "HTree checksum failed."
+ "Inode: %ud, "
+ "Block: %ud\n",
+ inode_ref->index,
+ blk);
+ }
+
+ p++;
+
+ /* Don't forget to put old block (prevent memory leak) */
+ r = ext4_block_set(inode_ref->fs->bdev, &p->b);
+ if (r != 0)
+ return r;
+
+ memcpy(&p->b, &b, sizeof(b));
+ p->entries = ((struct ext4_dir_idx_node *)b.data)->entries;
+ p->position = p->entries;
+ }
+
+ return EXT4_ERR_NOT_FOUND;
+}
+
+int ext4_dir_dx_find_entry(struct ext4_dir_search_result *result,
+ struct ext4_inode_ref *inode_ref, usize name_len,
+ const char *name)
+{
+ /* Load direct block 0 (index root) */
+ ext4_fsblk_t root_block_addr;
+ int rc2;
+ int rc;
+ rc = ext4_fs_get_inode_dblk_idx(inode_ref, 0, &root_block_addr, false);
+ if (rc != 0)
+ return rc;
+
+ struct ext4_fs *fs = inode_ref->fs;
+
+ struct ext4_block root_block;
+ rc = ext4_trans_block_get(fs->bdev, &root_block, root_block_addr);
+ if (rc != 0)
+ return rc;
+
+ if (!ext4_dir_dx_csum_verify(inode_ref, (void *)root_block.data)) {
+ ext4_dbg(DEBUG_DIR_IDX,
+ DBG_WARN "HTree root checksum failed."
+ "Inode: %ud, "
+ "Block: %ud\n",
+ inode_ref->index,
+ (u32int)0);
+ }
+
+ /* Initialize hash info (compute hash value) */
+ struct ext4_hash_info hinfo;
+ rc = ext4_dir_hinfo_init(&hinfo, &root_block, &fs->sb, name_len, name);
+ if (rc != 0) {
+ ext4_block_set(fs->bdev, &root_block);
+ return EXT4_ERR_BAD_DX_DIR;
+ }
+
+ /*
+ * Hardcoded number 2 means maximum height of index tree,
+ * specified in the Linux driver.
+ */
+ struct ext4_dir_idx_block dx_blocks[2];
+ struct ext4_dir_idx_block *dx_block;
+ struct ext4_dir_idx_block *tmp;
+
+ rc = ext4_dir_dx_get_leaf(&hinfo, inode_ref, &root_block, &dx_block,
+ dx_blocks);
+ if (rc != 0) {
+ ext4_block_set(fs->bdev, &root_block);
+ return EXT4_ERR_BAD_DX_DIR;
+ }
+
+ for (;;) {
+ /* Load leaf block */
+ u32int leaf_blk_idx;
+ ext4_fsblk_t leaf_block_addr;
+ struct ext4_block b;
+
+ leaf_blk_idx = ext4_dir_dx_entry_get_block(dx_block->position);
+ rc = ext4_fs_get_inode_dblk_idx(inode_ref, leaf_blk_idx,
+ &leaf_block_addr, false);
+ if (rc != 0)
+ break;
+
+ rc = ext4_trans_block_get(fs->bdev, &b, leaf_block_addr);
+ if (rc != 0)
+ break;
+
+ if (!ext4_dir_csum_verify(inode_ref, (void *)b.data)) {
+ ext4_dbg(DEBUG_DIR_IDX,
+ DBG_WARN "HTree leaf block checksum failed."
+ "Inode: %ud, "
+ "Block: %ud\n",
+ inode_ref->index,
+ leaf_blk_idx);
+ }
+
+ /* Linear search inside block */
+ struct ext4_dir_en *de;
+ rc = ext4_dir_find_in_block(&b, &fs->sb, name_len, name, &de);
+
+ /* Found => return it */
+ if (rc == 0) {
+ result->block = b;
+ result->dentry = de;
+ break;
+ }
+
+ /* Not found, leave untouched */
+ rc2 = ext4_block_set(fs->bdev, &b);
+ if (rc2 != 0)
+ break;
+
+ if (rc != EXT4_ERR_NOT_FOUND)
+ break;
+
+ /* check if the next block could be checked */
+ rc = ext4_dir_dx_next_block(inode_ref, hinfo.hash, dx_block, &dx_blocks[0]);
+ if (rc != 0) {
+ if (rc == EXT4_ERR_NOT_FOUND)
+ continue;
+ break;
+ }
+ }
+
+ /* The whole path must be released (preventing memory leak) */
+ tmp = dx_blocks;
+
+ while (tmp <= dx_block) {
+ rc2 = ext4_block_set(fs->bdev, &tmp->b);
+ if (rc == 0 && rc2 != 0)
+ rc = rc2;
+ ++tmp;
+ }
+
+ return rc;
+}
+
+/**@brief Compare function used to pass in quicksort implementation.
+ * It can compare two entries by hash value.
+ * @param arg1 First entry
+ * @param arg2 Second entry
+ * @param dummy Unused parameter, can be nil
+ *
+ * @return Classic compare result
+ * (0: equal, -1: arg1 < arg2, 1: arg1 > arg2)
+ */
+static int ext4_dir_dx_entry_comparator(const void *arg1, const void *arg2)
+{
+ struct ext4_dx_sort_entry *entry1 = (void *)arg1;
+ struct ext4_dx_sort_entry *entry2 = (void *)arg2;
+
+ if (entry1->hash == entry2->hash)
+ return 0;
+
+ if (entry1->hash < entry2->hash)
+ return -1;
+ else
+ return 1;
+}
+
+/**@brief Insert new index entry to block.
+ * Note that space for new entry must be checked by caller.
+ * @param inode_ref Directory i-node
+ * @param index_block Block where to insert new entry
+ * @param hash Hash value covered by child node
+ * @param iblock Logical number of child block
+ *
+ */
+static void
+ext4_dir_dx_insert_entry(struct ext4_inode_ref *inode_ref,
+ struct ext4_dir_idx_block *index_block,
+ u32int hash, u32int iblock)
+{
+ struct ext4_dir_idx_entry *old_index_entry = index_block->position;
+ struct ext4_dir_idx_entry *new_index_entry = old_index_entry + 1;
+ struct ext4_dir_idx_climit *climit = (void *)index_block->entries;
+ struct ext4_dir_idx_entry *start_index = index_block->entries;
+ u32int count = ext4_dir_dx_climit_get_count(climit);
+
+ usize bytes;
+ bytes = (u8int *)(start_index + count) - (u8int *)(new_index_entry);
+
+ memmove(new_index_entry + 1, new_index_entry, bytes);
+
+ ext4_dir_dx_entry_set_block(new_index_entry, iblock);
+ ext4_dir_dx_entry_set_hash(new_index_entry, hash);
+ ext4_dir_dx_climit_set_count(climit, count + 1);
+ ext4_dir_set_dx_csum(inode_ref, (void *)index_block->b.data);
+ ext4_trans_set_block_dirty(index_block->b.buf);
+}
+
+/**@brief Split directory entries to two parts preventing node overflow.
+ * @param inode_ref Directory i-node
+ * @param hinfo Hash info
+ * @param old_data_block Block with data to be split
+ * @param index_block Block where index entries are located
+ * @param new_data_block Output value for newly allocated data block
+ */
+static int ext4_dir_dx_split_data(struct ext4_inode_ref *inode_ref,
+ struct ext4_hash_info *hinfo,
+ struct ext4_block *old_data_block,
+ struct ext4_dir_idx_block *index_block,
+ struct ext4_block *new_data_block)
+{
+ int rc;
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+ u32int block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
+
+ /* Allocate buffer for directory entries */
+ u8int *entry_buffer = ext4_malloc(block_size);
+ if (entry_buffer == nil) {
+ werrstr(Enomem);
+ return -1;
+ }
+
+ /* dot entry has the smallest size available */
+ u32int max_ecnt = block_size / sizeof(struct ext4_dir_idx_dot_en);
+
+ /* Allocate sort entry */
+ struct ext4_dx_sort_entry *sort;
+
+ sort = ext4_malloc(max_ecnt * sizeof(struct ext4_dx_sort_entry));
+ if (sort == nil) {
+ ext4_free(entry_buffer);
+ werrstr(Enomem);
+ return -1;
+ }
+
+ u32int idx = 0;
+ u32int real_size = 0;
+
+ /* Initialize hinfo */
+ struct ext4_hash_info hinfo_tmp;
+ memcpy(&hinfo_tmp, hinfo, sizeof(struct ext4_hash_info));
+
+ /* Load all valid entries to the buffer */
+ struct ext4_dir_en *de = (void *)old_data_block->data;
+ u8int *entry_buffer_ptr = entry_buffer;
+ while ((void *)de < (void *)(old_data_block->data + block_size)) {
+ /* Read only valid entries */
+ if (ext4_dir_en_get_inode(de) && de->name_len) {
+ u16int len = ext4_dir_en_get_name_len(sb, de);
+ rc = ext4_dir_dx_hash_string(&hinfo_tmp, len,
+ (char *)de->name);
+ if (rc != 0) {
+ ext4_free(sort);
+ ext4_free(entry_buffer);
+ return rc;
+ }
+
+ u32int rec_len = 8 + len;
+ if ((rec_len % 4) != 0)
+ rec_len += 4 - (rec_len % 4);
+
+ memcpy(entry_buffer_ptr, de, rec_len);
+
+ sort[idx].dentry = entry_buffer_ptr;
+ sort[idx].rec_len = rec_len;
+ sort[idx].hash = hinfo_tmp.hash;
+
+ entry_buffer_ptr += rec_len;
+ real_size += rec_len;
+ idx++;
+ }
+
+ usize elen = ext4_dir_en_get_entry_len(de);
+ de = (void *)((u8int *)de + elen);
+ }
+
+ qsort(sort, idx, sizeof(struct ext4_dx_sort_entry),
+ ext4_dir_dx_entry_comparator);
+
+ /* Allocate new block for store the second part of entries */
+ ext4_fsblk_t new_fblock;
+ u32int new_iblock;
+ rc = ext4_fs_append_inode_dblk(inode_ref, &new_fblock, &new_iblock);
+ if (rc != 0) {
+ ext4_free(sort);
+ ext4_free(entry_buffer);
+ return rc;
+ }
+
+ /* Load new block */
+ struct ext4_block new_data_block_tmp;
+ rc = ext4_trans_block_get_noread(inode_ref->fs->bdev, &new_data_block_tmp,
+ new_fblock);
+ if (rc != 0) {
+ ext4_free(sort);
+ ext4_free(entry_buffer);
+ return rc;
+ }
+
+ /*
+ * Distribute entries to two blocks (by size)
+ * - compute the half
+ */
+ u32int new_hash = 0;
+ u32int current_size = 0;
+ u32int mid = 0;
+ u32int i;
+ for (i = 0; i < idx; ++i) {
+ if ((current_size + sort[i].rec_len) > (block_size / 2)) {
+ new_hash = sort[i].hash;
+ mid = i;
+ break;
+ }
+
+ current_size += sort[i].rec_len;
+ }
+
+ /* Check hash collision */
+ u32int continued = 0;
+ if (new_hash == sort[mid - 1].hash)
+ continued = 1;
+
+ u32int off = 0;
+ void *ptr;
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+ block_size -= sizeof(struct ext4_dir_entry_tail);
+
+ /* First part - to the old block */
+ for (i = 0; i < mid; ++i) {
+ ptr = old_data_block->data + off;
+ memcpy(ptr, sort[i].dentry, sort[i].rec_len);
+
+ struct ext4_dir_en *t = ptr;
+ if (i < (mid - 1))
+ ext4_dir_en_set_entry_len(t, sort[i].rec_len);
+ else
+ ext4_dir_en_set_entry_len(t, block_size - off);
+
+ off += sort[i].rec_len;
+ }
+
+ /* Second part - to the new block */
+ off = 0;
+ for (i = mid; i < idx; ++i) {
+ ptr = new_data_block_tmp.data + off;
+ memcpy(ptr, sort[i].dentry, sort[i].rec_len);
+
+ struct ext4_dir_en *t = ptr;
+ if (i < (idx - 1))
+ ext4_dir_en_set_entry_len(t, sort[i].rec_len);
+ else
+ ext4_dir_en_set_entry_len(t, block_size - off);
+
+ off += sort[i].rec_len;
+ }
+
+ block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
+
+ /* Do some steps to finish operation */
+ sb = &inode_ref->fs->sb;
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+ struct ext4_dir_entry_tail *t;
+
+ t = EXT4_DIRENT_TAIL(old_data_block->data, block_size);
+ ext4_dir_init_entry_tail(t);
+ t = EXT4_DIRENT_TAIL(new_data_block_tmp.data, block_size);
+ ext4_dir_init_entry_tail(t);
+ }
+ ext4_dir_set_csum(inode_ref, (void *)old_data_block->data);
+ ext4_dir_set_csum(inode_ref, (void *)new_data_block_tmp.data);
+ ext4_trans_set_block_dirty(old_data_block->buf);
+ ext4_trans_set_block_dirty(new_data_block_tmp.buf);
+
+ ext4_free(sort);
+ ext4_free(entry_buffer);
+
+ ext4_dir_dx_insert_entry(inode_ref, index_block, new_hash + continued,
+ new_iblock);
+
+ *new_data_block = new_data_block_tmp;
+ return 0;
+}
+
+/**@brief Split index node and maybe some parent nodes in the tree hierarchy.
+ * @param inode_ref Directory i-node
+ * @param dx_blocks Array with path from root to leaf node
+ * @param dx_block Leaf block to be split if needed
+ * @return Error code
+ */
+static int
+ext4_dir_dx_split_index(struct ext4_inode_ref *ino_ref,
+ struct ext4_dir_idx_block *dx_blks,
+ struct ext4_dir_idx_block *dxb,
+ struct ext4_dir_idx_block **new_dx_block)
+{
+ struct ext4_sblock *sb = &ino_ref->fs->sb;
+ struct ext4_dir_idx_entry *e;
+ int r;
+
+ u32int block_size = ext4_sb_get_block_size(&ino_ref->fs->sb);
+ u32int entry_space = block_size - sizeof(struct ext4_fake_dir_entry);
+ u32int node_limit = entry_space / sizeof(struct ext4_dir_idx_entry);
+
+ bool meta_csum = ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM);
+
+ if (dxb == dx_blks)
+ e = ((struct ext4_dir_idx_root *)dxb->b.data)->en;
+ else
+ e = ((struct ext4_dir_idx_node *)dxb->b.data)->entries;
+
+ struct ext4_dir_idx_climit *climit = (struct ext4_dir_idx_climit *)e;
+
+ u16int leaf_limit = ext4_dir_dx_climit_get_limit(climit);
+ u16int leaf_count = ext4_dir_dx_climit_get_count(climit);
+
+ /* Check if is necessary to split index block */
+ if (leaf_limit == leaf_count) {
+ struct ext4_dir_idx_entry *ren;
+ intptr levels = dxb - dx_blks;
+
+ ren = ((struct ext4_dir_idx_root *)dx_blks[0].b.data)->en;
+ struct ext4_dir_idx_climit *rclimit = (void *)ren;
+ u16int root_limit = ext4_dir_dx_climit_get_limit(rclimit);
+ u16int root_count = ext4_dir_dx_climit_get_count(rclimit);
+
+ /* Linux limitation */
+ if ((levels > 0) && (root_limit == root_count)) {
+ werrstr(Enospc);
+ return -1;
+ }
+
+ /* Add new block to directory */
+ ext4_fsblk_t new_fblk;
+ u32int new_iblk;
+ r = ext4_fs_append_inode_dblk(ino_ref, &new_fblk, &new_iblk);
+ if (r != 0)
+ return r;
+
+ /* load new block */
+ struct ext4_block b;
+ r = ext4_trans_block_get_noread(ino_ref->fs->bdev, &b, new_fblk);
+ if (r != 0)
+ return r;
+
+ struct ext4_dir_idx_node *new_node = (void *)b.data;
+ struct ext4_dir_idx_entry *new_en = new_node->entries;
+
+ memset(&new_node->fake, 0, sizeof(struct ext4_fake_dir_entry));
+ new_node->fake.entry_length = block_size;
+
+ /* Split leaf node */
+ if (levels > 0) {
+ u32int count_left = leaf_count / 2;
+ u32int count_right = leaf_count - count_left;
+ u32int hash_right;
+ usize sz;
+
+ struct ext4_dir_idx_climit *left_climit;
+ struct ext4_dir_idx_climit *right_climit;
+
+ hash_right = ext4_dir_dx_entry_get_hash(e + count_left);
+ /* Copy data to new node */
+ sz = count_right * sizeof(struct ext4_dir_idx_entry);
+ memcpy(new_en, e + count_left, sz);
+
+ /* Initialize new node */
+ left_climit = (struct ext4_dir_idx_climit *)e;
+ right_climit = (struct ext4_dir_idx_climit *)new_en;
+
+ ext4_dir_dx_climit_set_count(left_climit, count_left);
+ ext4_dir_dx_climit_set_count(right_climit, count_right);
+
+ if (meta_csum)
+ entry_space -= sizeof(struct ext4_dir_idx_tail);
+ USED(entry_space);
+
+ ext4_dir_dx_climit_set_limit(right_climit, node_limit);
+
+ /* Which index block is target for new entry */
+ u32int position_index =
+ (dxb->position - dxb->entries);
+ if (position_index >= count_left) {
+ ext4_dir_set_dx_csum(
+ ino_ref,
+ (struct ext4_dir_en *)
+ dxb->b.data);
+ ext4_trans_set_block_dirty(dxb->b.buf);
+
+ struct ext4_block block_tmp = dxb->b;
+
+ dxb->b = b;
+
+ dxb->position =
+ new_en + position_index - count_left;
+ dxb->entries = new_en;
+
+ b = block_tmp;
+ }
+
+ /* Finally insert new entry */
+ ext4_dir_dx_insert_entry(ino_ref, dx_blks, hash_right,
+ new_iblk);
+ ext4_dir_set_dx_csum(ino_ref, (void*)dx_blks[0].b.data);
+ ext4_dir_set_dx_csum(ino_ref, (void*)dx_blks[1].b.data);
+ ext4_trans_set_block_dirty(dx_blks[0].b.buf);
+ ext4_trans_set_block_dirty(dx_blks[1].b.buf);
+
+ ext4_dir_set_dx_csum(ino_ref, (void *)b.data);
+ ext4_trans_set_block_dirty(b.buf);
+ return ext4_block_set(ino_ref->fs->bdev, &b);
+ } else {
+ usize sz;
+ /* Copy data from root to child block */
+ sz = leaf_count * sizeof(struct ext4_dir_idx_entry);
+ memcpy(new_en, e, sz);
+
+ struct ext4_dir_idx_climit *new_climit = (void*)new_en;
+ if (meta_csum)
+ entry_space -= sizeof(struct ext4_dir_idx_tail);
+ USED(entry_space);
+
+ ext4_dir_dx_climit_set_limit(new_climit, node_limit);
+
+ /* Set values in root node */
+ struct ext4_dir_idx_climit *new_root_climit = (void *)e;
+
+ ext4_dir_dx_climit_set_count(new_root_climit, 1);
+ ext4_dir_dx_entry_set_block(e, new_iblk);
+
+ struct ext4_dir_idx_root *r = (void *)dx_blks[0].b.data;
+ r->info.indirect_levels = 1;
+
+ /* Add new entry to the path */
+ dxb = dx_blks + 1;
+ dxb->position = dx_blks->position - e + new_en;
+ dxb->entries = new_en;
+ dxb->b = b;
+ *new_dx_block = dxb;
+
+ ext4_dir_set_dx_csum(ino_ref, (void*)dx_blks[0].b.data);
+ ext4_dir_set_dx_csum(ino_ref, (void*)dx_blks[1].b.data);
+ ext4_trans_set_block_dirty(dx_blks[0].b.buf);
+ ext4_trans_set_block_dirty(dx_blks[1].b.buf);
+ }
+ }
+
+ return 0;
+}
+
+int ext4_dir_dx_add_entry(struct ext4_inode_ref *parent,
+ struct ext4_inode_ref *child, const char *name, u32int name_len)
+{
+ int rc2 = 0;
+ int r;
+ /* Get direct block 0 (index root) */
+ ext4_fsblk_t rblock_addr;
+ r = ext4_fs_get_inode_dblk_idx(parent, 0, &rblock_addr, false);
+ if (r != 0)
+ return r;
+
+ struct ext4_fs *fs = parent->fs;
+ struct ext4_block root_blk;
+
+ r = ext4_trans_block_get(fs->bdev, &root_blk, rblock_addr);
+ if (r != 0)
+ return r;
+
+ if (!ext4_dir_dx_csum_verify(parent, (void*)root_blk.data)) {
+ ext4_dbg(DEBUG_DIR_IDX,
+ DBG_WARN "HTree root checksum failed."
+ "Inode: %ud, "
+ "Block: %ud\n",
+ parent->index,
+ (u32int)0);
+ }
+
+ /* Initialize hinfo structure (mainly compute hash) */
+ struct ext4_hash_info hinfo;
+ r = ext4_dir_hinfo_init(&hinfo, &root_blk, &fs->sb, name_len, name);
+ if (r != 0) {
+ ext4_block_set(fs->bdev, &root_blk);
+ return EXT4_ERR_BAD_DX_DIR;
+ }
+
+ /*
+ * Hardcoded number 2 means maximum height of index
+ * tree defined in Linux.
+ */
+ struct ext4_dir_idx_block dx_blks[2];
+ struct ext4_dir_idx_block *dx_blk;
+ struct ext4_dir_idx_block *dx_it;
+
+ r = ext4_dir_dx_get_leaf(&hinfo, parent, &root_blk, &dx_blk, dx_blks);
+ if (r != 0) {
+ r = EXT4_ERR_BAD_DX_DIR;
+ goto release_index;
+ }
+
+ /* Try to insert to existing data block */
+ u32int leaf_block_idx = ext4_dir_dx_entry_get_block(dx_blk->position);
+ ext4_fsblk_t leaf_block_addr;
+ r = ext4_fs_get_inode_dblk_idx(parent, leaf_block_idx,
+ &leaf_block_addr, false);
+ if (r != 0)
+ goto release_index;
+
+ /*
+ * Check if there is needed to split index node
+ * (and recursively also parent nodes)
+ */
+ r = ext4_dir_dx_split_index(parent, dx_blks, dx_blk, &dx_blk);
+ if (r != 0)
+ goto release_target_index;
+
+ struct ext4_block target_block;
+ r = ext4_trans_block_get(fs->bdev, &target_block, leaf_block_addr);
+ if (r != 0)
+ goto release_index;
+
+ if (!ext4_dir_csum_verify(parent,(void *)target_block.data)) {
+ ext4_dbg(DEBUG_DIR_IDX,
+ DBG_WARN "HTree leaf block checksum failed."
+ "Inode: %ud, "
+ "Block: %ud\n",
+ parent->index,
+ leaf_block_idx);
+ }
+
+ /* Check if insert operation passed */
+ r = ext4_dir_try_insert_entry(&fs->sb, parent, &target_block, child,
+ name, name_len);
+ if (r == 0)
+ goto release_target_index;
+
+ /* Split entries to two blocks (includes sorting by hash value) */
+ struct ext4_block new_block;
+ r = ext4_dir_dx_split_data(parent, &hinfo, &target_block, dx_blk,
+ &new_block);
+ if (r != 0)
+ goto release_target_index;
+
+ /* Where to save new entry */
+ u32int blk_hash = ext4_dir_dx_entry_get_hash(dx_blk->position + 1);
+ if (hinfo.hash >= blk_hash)
+ r = ext4_dir_try_insert_entry(&fs->sb, parent, &new_block,
+ child, name, name_len);
+ else
+ r = ext4_dir_try_insert_entry(&fs->sb, parent, &target_block,
+ child, name, name_len);
+ if (r != 0)
+ goto release_target_index;
+
+ /* Cleanup */
+ r = ext4_block_set(fs->bdev, &new_block);
+ if (r != 0)
+ return r;
+
+/* Cleanup operations */
+
+release_target_index:
+ rc2 = r;
+
+ r = ext4_block_set(fs->bdev, &target_block);
+ if (r != 0)
+ return r;
+
+release_index:
+ if (r != 0)
+ rc2 = r;
+
+ dx_it = dx_blks;
+
+ while (dx_it <= dx_blk) {
+ r = ext4_block_set(fs->bdev, &dx_it->b);
+ if (r != 0)
+ return r;
+
+ dx_it++;
+ }
+
+ return rc2;
+}
+
+int ext4_dir_dx_reset_parent_inode(struct ext4_inode_ref *dir,
+ u32int parent_inode)
+{
+ /* Load block 0, where will be index root located */
+ ext4_fsblk_t fblock;
+ int rc = ext4_fs_get_inode_dblk_idx(dir, 0, &fblock, false);
+ if (rc != 0)
+ return rc;
+
+ struct ext4_block block;
+ rc = ext4_trans_block_get(dir->fs->bdev, &block, fblock);
+ if (rc != 0)
+ return rc;
+
+ if (!ext4_dir_dx_csum_verify(dir, (void *)block.data)) {
+ ext4_dbg(DEBUG_DIR_IDX,
+ DBG_WARN "HTree root checksum failed."
+ "Inode: %ud, "
+ "Block: %ud\n",
+ dir->index,
+ (u32int)0);
+ }
+
+ /* Initialize pointers to data structures */
+ struct ext4_dir_idx_root *root = (void *)block.data;
+
+ /* Fill the inode field with a new parent ino. */
+ ext4_dx_dot_en_set_inode(&root->dots[1], parent_inode);
+
+ ext4_dir_set_dx_csum(dir, (void *)block.data);
+ ext4_trans_set_block_dirty(block.buf);
+
+ return ext4_block_set(dir->fs->bdev, &block);
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_extent.c
@@ -1,0 +1,2218 @@
+#include "ext4_config.h"
+#include "ext4_debug.h"
+#include "ext4_fs.h"
+#include "ext4_trans.h"
+#include "ext4_blockdev.h"
+#include "ext4_extent.h"
+#include "ext4_inode.h"
+#include "ext4_super.h"
+#include "ext4_crc32.h"
+#include "ext4_balloc.h"
+
+//#define CONFIG_EXTENT_DEBUG_VERBOSE
+
+/**@brief Return the extent tree depth
+ * @param inode_ref I-node reference the tree belongs to
+ * @return Depth of extent tree */
+static inline u16int
+ext4_extent_tree_depth(struct ext4_inode_ref *inode_ref)
+{
+ struct ext4_extent_header *eh;
+ eh = ext4_inode_get_extent_header(inode_ref->inode);
+ return ext4_extent_header_get_depth(eh);
+}
+
+static struct ext4_extent_tail *
+ext4_extent_get_csum_tail(struct ext4_extent_header *eh)
+{
+ return (struct ext4_extent_tail *)(((char *)eh) +
+ EXT4_EXTENT_TAIL_OFFSET(eh));
+}
+
+static u32int ext4_extent_block_csum(struct ext4_inode_ref *inode_ref,
+ struct ext4_extent_header *eh)
+{
+ u32int checksum = 0;
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+ u32int ino_index = to_le32(inode_ref->index);
+ u32int ino_gen =
+ to_le32(ext4_inode_get_generation(inode_ref->inode));
+ /* First calculate crc32 checksum against fs uuid */
+ checksum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid,
+ sizeof(sb->uuid));
+ /* Then calculate crc32 checksum against inode number
+ * and inode generation */
+ checksum = ext4_crc32c(checksum, &ino_index,
+ sizeof(ino_index));
+ checksum = ext4_crc32c(checksum, &ino_gen,
+ sizeof(ino_gen));
+ /* Finally calculate crc32 checksum against
+ * the entire extent block up to the checksum field */
+ checksum = ext4_crc32c(checksum, eh,
+ EXT4_EXTENT_TAIL_OFFSET(eh));
+ }
+ return checksum;
+}
+
+static bool
+ext4_extent_verify_block_csum(struct ext4_inode_ref *inode_ref,
+ struct ext4_block *block)
+{
+ u16int rootdepth;
+ struct ext4_extent_tail *tail;
+ struct ext4_extent_header *eh;
+
+ rootdepth = ext4_extent_tree_depth(inode_ref);
+
+ if (!ext4_sb_feature_ro_com(&inode_ref->fs->sb,
+ EXT4_FRO_COM_METADATA_CSUM))
+ return true;
+
+ eh = (struct ext4_extent_header *)block->data;
+ if (ext4_extent_header_get_depth(eh) < rootdepth) {
+ tail = ext4_extent_get_csum_tail(eh);
+ return tail->checksum ==
+ to_le32(ext4_extent_block_csum(inode_ref, eh));
+ }
+
+ return true;
+}
+
+static void
+ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref,
+ struct ext4_extent_header *eh)
+{
+ u16int rootdepth;
+ struct ext4_extent_tail *tail;
+
+ rootdepth = ext4_extent_tree_depth(inode_ref);
+
+ if (!ext4_sb_feature_ro_com(&inode_ref->fs->sb,
+ EXT4_FRO_COM_METADATA_CSUM))
+ return;
+
+ if (ext4_extent_header_get_depth(eh) < rootdepth) {
+ tail = ext4_extent_get_csum_tail(eh);
+ tail->checksum = to_le32(ext4_extent_block_csum(inode_ref, eh));
+ }
+}
+
+#ifdef CONFIG_EXTENT_DEBUG_VERBOSE
+static void
+ext4_extent_print_path(struct ext4_inode_ref *inode_ref,
+ struct ext4_extent_path *path)
+{
+ u16int rootdepth;
+ struct ext4_extent_path *p;
+
+ rootdepth = ext4_extent_tree_depth(inode_ref);
+ p = path + rootdepth;
+
+ ext4_dbg(DEBUG_EXTENT,
+ DBG_INFO "Path address: %p\n", path);
+ while (p >= path) {
+ u16int i;
+ u16int entries =
+ ext4_extent_header_get_nentries(p->header);
+ u16int limit =
+ ext4_extent_header_get_max_nentries(p->header);
+
+ ext4_dbg(DEBUG_EXTENT,
+DBG_INFO "-- Block: %llud, Depth: %uhd, Entries: %uhd, Limit: %uhd\n",
+ p->block.lb_id, p->depth, entries, limit);
+ for (i = 0; i < entries; i++) {
+ if (p->depth) {
+ struct ext4_extent_index *index;
+
+ index = EXT4_EXTENT_FIRST_INDEX(p->header) + i;
+ ext4_dbg(DEBUG_EXTENT,
+DBG_INFO "Index: iblock: %ud, fsblock: %llud\n",
+ ext4_extent_index_get_iblock(index),
+ ext4_extent_index_get_fblock(index));
+ } else {
+ struct ext4_extent *extent;
+
+ extent = EXT4_EXTENT_FIRST(p->header) + i;
+ ext4_dbg(DEBUG_EXTENT,
+DBG_INFO "Extent: iblock: %ud, fsblock: %llud, count: %uhd\n",
+ ext4_extent_get_iblock(extent),
+ ext4_extent_get_fblock(extent),
+ ext4_extent_get_nblocks(extent));
+ }
+ }
+
+ p--;
+ }
+
+ ext4_dbg(DEBUG_EXTENT,
+ DBG_INFO "====================\n");
+}
+#else /* CONFIG_EXTENT_DEBUG_VERBOSE */
+#define ext4_extent_print_path(...)
+#endif /* CONFIG_EXTENT_DEBUG_VERBOSE */
+
+/**@brief Binary search in extent index node.
+ * @param header Extent header of index node
+ * @param index Output value - found index will be set here
+ * @param iblock Logical block number to find in index node */
+static void ext4_extent_binsearch_idx(struct ext4_extent_header *header,
+ struct ext4_extent_index **index,
+ ext4_lblk_t iblock)
+{
+ struct ext4_extent_index *r;
+ struct ext4_extent_index *l;
+ struct ext4_extent_index *m;
+
+ u16int nentries = ext4_extent_header_get_nentries(header);
+
+ /* Initialize bounds */
+ l = EXT4_EXTENT_FIRST_INDEX(header) + 1;
+ r = EXT4_EXTENT_FIRST_INDEX(header) + nentries - 1;
+
+ /* Do binary search */
+ while (l <= r) {
+ m = l + (r - l) / 2;
+ ext4_lblk_t eiiblock = ext4_extent_index_get_iblock(m);
+
+ if (iblock < eiiblock)
+ r = m - 1;
+ else
+ l = m + 1;
+ }
+
+ /* Set output value */
+ *index = l - 1;
+}
+
+/**@brief Binary search in extent leaf node.
+ * @param header Extent header of leaf node
+ * @param extent Output value - found extent will be set here,
+ * or nil if node is empty
+ * @param iblock Logical block number to find in leaf node */
+static void ext4_extent_binsearch(struct ext4_extent_header *header,
+ struct ext4_extent **extent,
+ ext4_lblk_t iblock)
+{
+ struct ext4_extent *r;
+ struct ext4_extent *l;
+ struct ext4_extent *m;
+
+ u16int nentries = ext4_extent_header_get_nentries(header);
+
+ if (nentries == 0) {
+ /* this leaf is empty */
+ *extent = nil;
+ return;
+ }
+
+ /* Initialize bounds */
+ l = EXT4_EXTENT_FIRST(header) + 1;
+ r = EXT4_EXTENT_FIRST(header) + nentries - 1;
+
+ /* Do binary search */
+ while (l <= r) {
+ m = l + (r - l) / 2;
+ ext4_lblk_t eiblock = ext4_extent_get_iblock(m);
+
+ if (iblock < eiblock)
+ r = m - 1;
+ else
+ l = m + 1;
+ }
+
+ /* Set output value */
+ *extent = l - 1;
+}
+
+static void
+ext4_extent_path_dirty(struct ext4_inode_ref *inode_ref,
+ struct ext4_extent_path *path,
+ u16int depth)
+{
+ u16int rootdepth;
+ rootdepth = ext4_extent_tree_depth(inode_ref);
+
+ if (rootdepth != depth) {
+ struct ext4_extent_path *p;
+ p = path + depth;
+ ext4_extent_block_csum_set(inode_ref, p->header);
+ ext4_trans_set_block_dirty(p->block.buf);
+ } else
+ inode_ref->dirty = true;
+}
+
+static int
+ext4_extent_path_release(struct ext4_inode_ref *inode_ref,
+ struct ext4_extent_path *path)
+{
+ int ret = 0;
+ u16int i, rootdepth;
+
+ rootdepth = ext4_extent_tree_depth(inode_ref);
+
+ for (i = 0; i < rootdepth; i++) {
+ if (path[i].block.lb_id) {
+ ret = ext4_block_set(inode_ref->fs->bdev,
+ &path[i].block);
+ if (ret != 0)
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/**@brief Physical block allocation hint for extent tree manipulation
+ * routines
+ * @param inode_ref I-node
+ * @return Physical block allocation hint */
+static ext4_fsblk_t
+ext4_extent_tree_alloc_goal(struct ext4_inode_ref *inode_ref)
+{
+ u32int bgid;
+ struct ext4_sblock *sb;
+
+ sb = &inode_ref->fs->sb;
+ bgid = inode_ref->index / ext4_get32(sb, inodes_per_group);
+
+ /* Currently for allocations from extent tree manipulation routines,
+ * we try the blocks in the block group the inode table block refers
+ * to */
+ return ext4_fs_first_bg_block_no(sb, bgid);
+}
+
+/**@brief Physical block allocation hint for data blocks routines
+ * @param inode_ref I-node
+ * @param path path in the extent tree
+ * @param iblock the starting logical block of the
+ * mapping to be inserted
+ * @return Physical block allocation hint */
+static ext4_fsblk_t
+ext4_extent_data_alloc_goal(struct ext4_inode_ref *inode_ref,
+ struct ext4_extent_path *path,
+ ext4_lblk_t iblock)
+{
+ ext4_fsblk_t ret;
+ struct ext4_extent *ext;
+
+ ext = path[0].extent;
+ if (!ext)
+ /* If there is no mapping yet, we return
+ * ext4_extent_tree_alloc_goal() as hints */
+ return ext4_extent_tree_alloc_goal(inode_ref) + iblock;
+
+ /* We want the whole file to be continuous. */
+ if (ext4_extent_get_iblock(ext) < iblock)
+ ret = ext4_extent_get_fblock(ext) +
+ iblock - ext4_extent_get_iblock(ext);
+ else {
+ if (ext4_extent_get_iblock(ext) - iblock >
+ ext4_extent_get_fblock(ext))
+ ret = ext4_extent_get_fblock(ext);
+ else
+ ret = ext4_extent_get_fblock(ext) -
+ (ext4_extent_get_iblock(ext) - iblock);
+ }
+
+ return ret;
+}
+
+/**@brief Verify the extent node block is valid
+ * @param inode_ref I-node
+ * @param block block buffer of the extent node block
+ * @param depth the depth of extent node wanted
+ * @return true if the block passes verification, otherwise false
+ */
+static bool ext4_extent_block_verify(struct ext4_inode_ref *inode_ref,
+ struct ext4_block *block,
+ u16int depth)
+{
+ u32int blocksz;
+ u16int maxnentries;
+ struct ext4_extent_header *eh;
+
+ eh = (struct ext4_extent_header *)block->data;
+ blocksz = ext4_sb_get_block_size(&inode_ref->fs->sb);
+
+ /* Check if the magic number of the extent node header is correct */
+ if (ext4_extent_header_get_magic(eh) != EXT4_EXTENT_MAGIC) {
+ ext4_dbg(DEBUG_EXTENT,
+DBG_ERROR "Extent node block header mismatch! Block number: %llud\n",
+ block->lb_id);
+ return false;
+ }
+
+ /* Check if the depth field of extent node header matches what the
+ * caller wants */
+ if (ext4_extent_header_get_depth(eh) != depth) {
+ ext4_dbg(DEBUG_EXTENT,
+DBG_ERROR "Extent node block depth mismatch! Expected: %uhd, Got: %uhd. Block number: %llud\n",
+ depth, ext4_extent_header_get_depth(eh),
+ block->lb_id);
+ return false;
+ }
+
+ /* Check if the non-root node contains entries */
+ if (!ext4_extent_header_get_nentries(eh)) {
+ ext4_dbg(DEBUG_EXTENT,
+DBG_ERROR "Extent node block does not contain any entries! Block number: %llud\n",
+ block->lb_id);
+ return false;
+ }
+
+ /* Make sure that the maximum entries field of the
+ * extent node header is correct */
+ maxnentries = (blocksz - sizeof(struct ext4_extent_header)) /
+ sizeof(struct ext4_extent);
+
+ if (ext4_extent_header_get_max_nentries(eh) != maxnentries) {
+ ext4_dbg(DEBUG_EXTENT,
+DBG_ERROR "Incorrect extent node block maximum entries field! Expected: %uhd, Got: %uhd. Block number: %llud\n",
+ maxnentries,
+ ext4_extent_header_get_max_nentries(eh),
+ block->lb_id);
+ return false;
+ }
+
+ /* Check if the checksum of the block is correct */
+ if (!ext4_extent_verify_block_csum(inode_ref,
+ block)) {
+ ext4_dbg(DEBUG_EXTENT,
+DBG_ERROR "Extent node block checksum failed! Block number: %llud\n",
+ block->lb_id);
+ return false;
+ }
+
+ /* The block passes verification */
+ return true;
+}
+
+/**@brief Find extent for specified iblock.
+ * This function is used for finding block in the extent tree with
+ * saving the path through the tree for possible future modifications.
+ * @param inode_ref I-node to read extent tree from
+ * @param iblock Iblock to find extent for
+ * @param ppath Output value - loaded path from extent tree
+ * @return Error code */
+static int ext4_extent_find_extent(struct ext4_inode_ref *inode_ref,
+ ext4_lblk_t iblock,
+ struct ext4_extent_path **ppath)
+{
+ struct ext4_extent_header *eh;
+ int ret;
+ u16int depth;
+ u16int k;
+ struct ext4_extent_path *tpath;
+
+ depth = ext4_extent_tree_depth(inode_ref);
+ eh = ext4_inode_get_extent_header(inode_ref->inode);
+
+ /* Added 2 for possible tree growing (1 extra depth) */
+ tpath = ext4_malloc(sizeof(struct ext4_extent_path) * (depth + 2));
+ if (tpath == nil) {
+ werrstr(Enomem);
+ return -1;
+ }
+
+ /* Zero the path array because we need to make sure that
+ * lb_id field of block buffer is zero */
+ memset(tpath, 0, sizeof(struct ext4_extent_path) * (depth + 2));
+
+ /* Initialize structure for algorithm start */
+ k = depth;
+ tpath[k].block = inode_ref->block;
+ tpath[k].header = eh;
+
+ /* Walk through the extent tree */
+ while ((depth = ext4_extent_header_get_depth(eh)) != 0) {
+ /* Search index in index node by iblock */
+ ext4_extent_binsearch_idx(tpath[k].header,
+ &tpath[k].index, iblock);
+
+ tpath[k].depth = depth;
+ tpath[k].extent = nil;
+
+ assert(tpath[k].index != 0);
+
+ /* Load information for the next iteration */
+ u64int fblock =
+ ext4_extent_index_get_fblock(tpath[k].index);
+
+ struct ext4_block block;
+ ret = ext4_trans_block_get(inode_ref->fs->bdev, &block, fblock);
+ if (ret != 0)
+ goto errout0;
+
+ if (!ext4_extent_block_verify(inode_ref, &block, depth - 1)) {
+ werrstr(Eio);
+ ret = -1;
+ goto errout0;
+ }
+
+ k--;
+
+ eh = (struct ext4_extent_header *)block.data;
+ tpath[k].block = block;
+ tpath[k].header = eh;
+ }
+
+ tpath[k].depth = 0;
+ tpath[k].extent = nil;
+ tpath[k].index = nil;
+
+ /* Find extent in the leaf node */
+ ext4_extent_binsearch(tpath[k].header, &tpath[k].extent,
+ iblock);
+ *ppath = tpath;
+
+ return 0;
+
+errout0:
+ /* Put loaded blocks */
+ ext4_extent_path_release(inode_ref, tpath);
+
+ /* Destroy temporary data structure */
+ ext4_free(tpath);
+
+ return ret;
+}
+
+/**@brief Reload the paths in a cursor starting from the level having invalid
+ * pointer
+ * @param inode_ref I-node the extent tree resides in
+ * @param path Path in the extent tree
+ * @param depth The level to start the reload at
+ * @param right Try to load the rightmost children
+ * @return 0 on success, Eio on corrupted block, or return values of
+ * ext4_trans_block_get(). */
+int ext4_extent_reload_paths(struct ext4_inode_ref *inode_ref,
+ struct ext4_extent_path *path,
+ u16int depth,
+ bool right)
+{
+ int ret = 0;
+ struct ext4_extent_header *header;
+ struct ext4_extent_path *p;
+
+ /* actually we assume our caller starting from index level instead of
+ * extent level */
+ assert(depth);
+
+ p = path + depth;
+ header = p->header;
+
+ /* XXX: the path becomes invalid at the first place... */
+ if (p->index > EXT4_EXTENT_LAST_INDEX(header))
+ p->index = EXT4_EXTENT_LAST_INDEX(header);
+
+ /* Start reloading all the paths from the child of the specified level
+ * toward the leaf */
+ for (; p > path; --p) {
+ struct ext4_extent_path *chldp;
+ struct ext4_extent_index *idx;
+
+ chldp = p - 1;
+ header = p->header; USED(header);
+ idx = p->index;
+
+ /* Release the buffer of child path if the buffer is still
+ * valid */
+ if (chldp->block.lb_id) {
+ ret = ext4_block_set(inode_ref->fs->bdev, &chldp->block);
+ if (ret != 0)
+ goto out;
+ }
+
+ /* Read the block specified by the physical block field of the
+ * index */
+ ret = ext4_trans_block_get(inode_ref->fs->bdev, &chldp->block,
+ ext4_extent_index_get_fblock(idx));
+ if (ret != 0)
+ goto out;
+
+ header = (struct ext4_extent_header *)chldp->block.data;
+ /* Validate the block content before moving on. */
+ if (!ext4_extent_block_verify(inode_ref,
+ &chldp->block, p->depth - 1)) {
+ werrstr(Eio);
+ ret = -1;
+ goto out;
+ }
+
+ /* Reset the fields of child path */
+ chldp->header = header;
+ chldp->depth = ext4_extent_header_get_depth(header);
+ if (right) {
+ if (chldp->depth) {
+ chldp->index = EXT4_EXTENT_LAST_INDEX(header);
+ chldp->extent = nil;
+ } else {
+ chldp->extent = EXT4_EXTENT_LAST(header);
+ chldp->index = nil;
+ }
+ } else {
+ if (chldp->depth) {
+ chldp->index = EXT4_EXTENT_FIRST_INDEX(header);
+ chldp->extent = nil;
+ } else {
+ chldp->extent = EXT4_EXTENT_FIRST(header);
+ chldp->index = nil;
+ }
+ }
+ }
+out:
+ return ret;
+}
+
+/**@brief Seek to the next extent
+ * @param inode_ref I-node the extent tree resides in
+ * @param path Path in the extent tree
+ * @param nonextp Output value - whether the current extent is the
+ * right-most extent already
+ * @return 0 on success, Eio on currupted block, or return values of
+ * ext4_trans_block_get(). */
+int ext4_extent_increment(struct ext4_inode_ref *inode_ref,
+ struct ext4_extent_path *path,
+ bool *nonextp)
+{
+ int ret = 0;
+ u16int ptr;
+ bool nonext = true;
+ u16int depth = 0;
+ struct ext4_extent_path *p;
+ u16int rootdepth;
+
+ p = path;
+ rootdepth = ext4_extent_tree_depth(inode_ref);
+
+ /* Iterate the paths from the leaf to the root */
+ while (depth <= rootdepth) {
+ struct ext4_extent_header *header;
+
+ if (p->depth) {
+ ptr = p->index -
+ EXT4_EXTENT_FIRST_INDEX(p->header);
+ } else {
+ ptr = p->extent -
+ EXT4_EXTENT_FIRST(p->header);
+ }
+
+ header = p->header;
+
+ if (ptr < ext4_extent_header_get_nentries(header) - 1)
+ /* We found a path with non-rightmost pointer */
+ break;
+
+ /* Move to the parent path */
+ p++;
+ depth++;
+ }
+
+ /* If we can't find a path with a non-rightmost pointer,
+ * we are already on the last extent, just return in this
+ * case */
+ if (depth > rootdepth)
+ goto out;
+
+ /* Increment the pointer once we found a path with non-rightmost
+ * pointer */
+ if (p->depth)
+ p->index++;
+ else
+ p->extent++;
+
+ if (depth) {
+ /* We need to reload the paths to leaf if the path iterator
+ * is not pointing to the leaf */
+ ret = ext4_extent_reload_paths(inode_ref, path, depth, false);
+ if (ret != 0)
+ goto out;
+ }
+
+ /* Found the next extent */
+ nonext = false;
+out:
+ if (nonextp)
+ *nonextp = nonext;
+
+ return ret;
+}
+
+/**@brief Seek to the previous extent
+ * @param inode_ref I-node the extent tree resides in
+ * @param path Path in the extent tree
+ * @param noprevp Output value - whether the current extent is the
+ * left-most extent already
+ * @return 0 on success, Eio on currupted block, or return values of
+ * ext4_trans_block_get(). */
+int
+ext4_extent_decrement(struct ext4_inode_ref *inode_ref,
+ struct ext4_extent_path *path,
+ bool *noprevp)
+{
+ int ret = 0;
+ u16int ptr;
+ bool noprev = true;
+ u16int depth = 0;
+ struct ext4_extent_path *p;
+ u16int rootdepth;
+
+ p = path;
+ rootdepth = ext4_extent_tree_depth(inode_ref);
+
+ /* Iterate the paths from the leaf to the root */
+ while (depth <= rootdepth) {
+ if (p->depth) {
+ ptr = p->index -
+ EXT4_EXTENT_FIRST_INDEX(p->header);
+ } else {
+ ptr = p->extent -
+ EXT4_EXTENT_FIRST(p->header);
+ }
+
+ if (ptr)
+ /* We found a path with non-leftmost pointer */
+ break;
+
+ /* Move to the parent path */
+ p++;
+ depth++;
+ }
+
+ /* If we can't find a path with a non-leftmost pointer,
+ * we are already on the first extent, just return in this
+ * case */
+ if (depth > rootdepth)
+ goto out;
+
+ /* Decrement the pointer once we found a path with non-leftmost
+ * pointer */
+ if (p->depth)
+ p->index--;
+ else
+ p->extent--;
+
+ if (depth) {
+ /* We need to reload the paths to leaf if the path iterator
+ * is not pointing to the leaf */
+ ret = ext4_extent_reload_paths(inode_ref, path, depth, true);
+ if (ret != 0)
+ goto out;
+ }
+
+ /* Found the previous extent */
+ noprev = false;
+out:
+ if (noprevp)
+ *noprevp = noprev;
+ return ret;
+}
+
+
+/**@brief Update the index of nodes starting from leaf
+ * @param inode_ref I-node the extent tree resides in
+ * @param path Path in the extent tree
+ * @param force set this to true if insertion, deletion or modification
+ * of starting logical block of the first index in a node is made at non-leaf
+ * level */
+static void ext4_extent_update_index(struct ext4_inode_ref *inode_ref,
+ struct ext4_extent_path *path,
+ bool force)
+{
+ u16int rootdepth;
+ struct ext4_extent_path *p;
+
+ rootdepth = ext4_extent_tree_depth(inode_ref);
+
+ /* Iterate the paths from the parent of the leaf to the root */
+ for (p = path + 1; p <= path + rootdepth; p++) {
+ struct ext4_extent_path *chldp;
+ struct ext4_extent_header *child_header;
+ intptr chldptr;
+
+ /* This points to the child path of the current path */
+ chldp = p - 1;
+ child_header = chldp->header;
+
+ if (!chldp->depth)
+ chldptr = chldp->extent -
+ EXT4_EXTENT_FIRST(child_header);
+ else
+ chldptr = chldp->index -
+ EXT4_EXTENT_FIRST_INDEX(child_header);
+
+ /* If the modification on the child node is not made on the
+ * first slot of the node, we are done */
+ if (chldptr)
+ break;
+
+ if (p->depth > 1) {
+ struct ext4_extent_index *idx = p->index;
+ struct ext4_extent_index *chldidx =
+ chldp->index;
+ ext4_lblk_t iblock, chldiblock;
+
+ iblock = ext4_extent_index_get_iblock(idx);
+ chldiblock = ext4_extent_index_get_iblock(chldidx);
+
+ if (iblock != chldiblock) {
+ /* If the starting logical block of the first
+ * index of the child node is modified, we
+ * update the starting logical block of index
+ * pointing to the child node */
+ ext4_extent_index_set_iblock(idx, chldiblock);
+ ext4_extent_path_dirty(inode_ref, path,
+ p->depth);
+ } else if (!force)
+ /* We do not need to continue the iteration */
+ break;
+ } else {
+ struct ext4_extent_index *idx = p->index;
+ struct ext4_extent *chldext = chldp->extent;
+ ext4_lblk_t iblock, chldiblock;
+
+ iblock = ext4_extent_index_get_iblock(idx);
+ chldiblock = ext4_extent_get_iblock(chldext);
+
+ if (iblock != chldiblock) {
+ /* If the starting logical block of the first
+ * extent of the child node is modified, we
+ * update the starting logical block of index
+ * pointing to the child node */
+ ext4_extent_index_set_iblock(idx, chldiblock);
+ ext4_extent_path_dirty(inode_ref, path,
+ p->depth);
+ } else if (!force)
+ /* We do not need to continue the iteration */
+ break;
+ }
+ };
+}
+
+/**@brief Make the tree grow up by one level
+ * @param inode_ref I-node the extent tree resides in
+ * @param path Path in the extent tree
+ * @param new_fblock The newly allocated block for tree growth
+ * @return Error code */
+static int ext4_extent_grow_tree(struct ext4_inode_ref *inode_ref,
+ struct ext4_extent_path *path,
+ ext4_fsblk_t newfblock)
+{
+ int rc;
+ u16int ptr;
+ struct ext4_block block;
+ ext4_lblk_t chldiblock;
+ u16int rootdepth;
+ struct ext4_block rootblock;
+ struct ext4_extent_header *rooteh;
+ struct ext4_extent_path *nrootp;
+ struct ext4_extent_path *rootp;
+ u32int blocksz;
+ u16int maxnentries;
+
+ rootdepth = ext4_extent_tree_depth(inode_ref);
+ rootp = path + rootdepth;
+ nrootp = rootp + 1;
+ rootblock = rootp->block;
+ rooteh = rootp->header;
+ blocksz = ext4_sb_get_block_size(&inode_ref->fs->sb);
+
+ /* Store the extent/index offset so that we can recover the
+ * pointer to it later */
+ if (rootdepth) {
+ ptr = rootp->index -
+ EXT4_EXTENT_FIRST_INDEX(rootp->header);
+ } else {
+ ptr = rootp->extent -
+ EXT4_EXTENT_FIRST(rootp->header);
+ }
+ /* Prepare a buffer for newly allocated block */
+ rc = ext4_trans_block_get_noread(inode_ref->fs->bdev, &block, newfblock);
+ if (rc != 0)
+ return rc;
+
+ /* Initialize newly allocated block */
+ memset(block.data, 0, blocksz);
+
+ /* Move data from root to the new block */
+ memcpy(block.data, inode_ref->inode->blocks,
+ EXT4_INODE_BLOCKS * sizeof(u32int));
+
+ /* Update old root path */
+ rootp->block = block;
+ rootp->header = (struct ext4_extent_header *)block.data;
+ if (rootp->depth) {
+ rootp->index =
+ EXT4_EXTENT_FIRST_INDEX(rootp->header) +
+ ptr;
+
+ maxnentries =
+ (blocksz - sizeof(struct ext4_extent_header)) /
+ sizeof(struct ext4_extent_index);
+ rootp->extent = nil;
+ chldiblock =
+ ext4_extent_index_get_iblock(EXT4_EXTENT_FIRST_INDEX(rootp->header));
+ } else {
+ rootp->extent =
+ EXT4_EXTENT_FIRST(rootp->header) +
+ ptr;
+ maxnentries =
+ (blocksz - sizeof(struct ext4_extent_header)) /
+ sizeof(struct ext4_extent);
+ rootp->index = nil;
+ chldiblock =
+ ext4_extent_get_iblock(EXT4_EXTENT_FIRST(rootp->header));
+ }
+
+ /* Re-initialize new root metadata */
+ nrootp->depth = rootdepth + 1;
+ nrootp->block = rootblock;
+ nrootp->header = rooteh;
+ nrootp->extent = nil;
+ nrootp->index = EXT4_EXTENT_FIRST_INDEX(nrootp->header);
+
+ ext4_extent_header_set_depth(nrootp->header, nrootp->depth);
+
+ /* Create new entry in root */
+ ext4_extent_header_set_nentries(nrootp->header, 1);
+ ext4_extent_index_set_iblock(nrootp->index, chldiblock);
+ ext4_extent_index_set_fblock(nrootp->index, newfblock);
+
+ /* Since new_root belongs to on-disk inode,
+ * we don't do checksum here */
+ inode_ref->dirty = true;
+
+ /* Set upper limit for entries count of old root */
+ ext4_extent_header_set_max_nentries(rootp->header, maxnentries);
+
+ ext4_extent_path_dirty(inode_ref, path, rootp->depth);
+
+ return 0;
+}
+
+/**@brief Do splitting on the tree if the leaf is full
+ * @param inode_ref I-node the extent tree resides in
+ * @param path Path in the extent tree for possible splitting
+ * @param nslots number of entries that will be inserted to the
+ * leaf in future.
+ * @return Error code */
+static int ext4_extent_split(struct ext4_inode_ref *inode_ref,
+ struct ext4_extent_path *path,
+ u16int nslots)
+{
+ int ret;
+ u16int i;
+ ext4_fsblk_t goal;
+ u16int rootdepth;
+ struct ext4_extent_path *p;
+ u32int blocksz;
+ /* Number of new blocks to be allocated */
+ u16int nnewfblocks = 0;
+ /* Number of node to be split */
+ u16int nsplits = 0;
+ /* Array of new blocks allocated */
+ ext4_fsblk_t *newfblocks;
+ /* The index of the right block inserted last time */
+ ext4_lblk_t lastiblock = 0;
+ /* Whether we updated child path to point to the right block
+ * at the previous round during splitting */
+ bool prevrblock = false;
+
+ blocksz = ext4_sb_get_block_size(&inode_ref->fs->sb);
+ rootdepth = ext4_extent_tree_depth(inode_ref);
+ goal = ext4_extent_tree_alloc_goal(inode_ref);
+
+ /* First calculate how many levels will be touched */
+ for (p = path; p <= path + rootdepth; p++) {
+ u16int entries =
+ ext4_extent_header_get_nentries(p->header);
+ u16int limit =
+ ext4_extent_header_get_max_nentries(p->header);
+
+ assert(entries <= limit);
+ if (!p->depth) {
+ if (entries + nslots <= limit)
+ break;
+ } else {
+ if (entries < limit)
+ break;
+ }
+ /* We have to split a node when the tree is full */
+ nnewfblocks++;
+ nsplits++;
+ }
+
+ if (!nnewfblocks)
+ return 0;
+
+ /* Allocate the array for storing newly allocated blocks */
+ newfblocks = ext4_malloc(sizeof(ext4_fsblk_t) * nnewfblocks);
+ if (!newfblocks) {
+ werrstr(Enomem);
+ return -1;
+ }
+
+ for (i = 0; i < nnewfblocks; i++) {
+ ret = ext4_balloc_alloc_block(inode_ref, goal, newfblocks + i);
+ if (ret != 0)
+ return ret;
+ }
+
+ ext4_dbg(DEBUG_EXTENT,
+ DBG_INFO "nnewfblocks: %uhd rootdepth: %uhd\n",
+ nnewfblocks, rootdepth);
+
+ /* If number of blocks to be allocated is greater than
+ * the depth of root we have to grow the tree */
+ if (nnewfblocks == rootdepth + 1) {
+ ext4_dbg(DEBUG_EXTENT, "Growing: \n");
+ nsplits--;
+
+ ret = ext4_extent_grow_tree(inode_ref,
+ path, newfblocks[rootdepth]);
+ if (ret != 0)
+ goto finish;
+
+ ext4_extent_print_path(inode_ref, path);
+
+ /* If we are moving the in-inode leaf to on-block leaf.
+ * we do not need further actions. */
+ if (!rootdepth)
+ goto finish;
+
+ ++rootdepth; USED(rootdepth);
+ }
+
+ /* Start splitting */
+ p = path;
+ ext4_dbg(DEBUG_EXTENT, DBG_INFO "Start splitting: \n");
+ for (i = 0; i < nsplits; i++, p++) {
+ struct ext4_extent_header *header;
+ u16int entries =
+ ext4_extent_header_get_nentries(p->header);
+ u16int limit =
+ ext4_extent_header_get_max_nentries(p->header);
+ /* The entry we start shifting to the right block */
+ u16int split_ptr = entries / 2;
+ /* The number of entry the right block will have */
+ u16int right_entries = entries - split_ptr;
+ /* The current entry */
+ u16int curr_ptr;
+ ext4_lblk_t riblock;
+ struct ext4_block block;
+
+ ret = ext4_trans_block_get_noread(inode_ref->fs->bdev,
+ &block, newfblocks[i]);
+ if (ret != 0)
+ goto finish;
+
+ /* Initialize newly allocated block and remember it */
+ memset(block.data, 0, blocksz);
+
+ header = (void *)block.data;
+
+ /* Initialize on-disk structure (header) */
+ ext4_extent_header_set_nentries(header,
+ right_entries);
+ ext4_extent_header_set_max_nentries(header, limit);
+ ext4_extent_header_set_magic(header, EXT4_EXTENT_MAGIC);
+ ext4_extent_header_set_depth(header, p->depth);
+ ext4_extent_header_set_generation(header, 0);
+
+ /* Move some entries from old block to new block */
+ if (p->depth) {
+ struct ext4_extent_index *left_index =
+ EXT4_EXTENT_FIRST_INDEX(p->header);
+ struct ext4_extent_index *split_index =
+ left_index + split_ptr;
+
+ riblock = ext4_extent_index_get_iblock(split_index);
+ ext4_dbg(DEBUG_EXTENT,
+ DBG_INFO "depth: %ud, riblock: %ud\n",
+ p->depth, riblock);
+
+ curr_ptr = p->index - left_index;
+
+ memcpy(EXT4_EXTENT_FIRST_INDEX(header),
+ split_index,
+ right_entries * EXT4_EXTENT_INDEX_SIZE);
+ memset(split_index, 0,
+ right_entries * EXT4_EXTENT_INDEX_SIZE);
+ } else {
+ struct ext4_extent *left_extent =
+ EXT4_EXTENT_FIRST(p->header);
+ struct ext4_extent *split_extent =
+ left_extent + split_ptr;
+
+ riblock = ext4_extent_get_iblock(split_extent);
+ ext4_dbg(DEBUG_EXTENT,
+ DBG_INFO "depth: %ud, riblock: %ud\n",
+ p->depth, riblock);
+
+ curr_ptr = p->extent - left_extent;
+
+ memcpy(EXT4_EXTENT_FIRST(header),
+ split_extent,
+ right_entries * EXT4_EXTENT_SIZE);
+ memset(split_extent, 0,
+ right_entries * EXT4_EXTENT_SIZE);
+ }
+
+ /* Set entries count in left node */
+ ext4_extent_header_set_nentries(p->header,
+ entries - right_entries);
+
+ /* Decide whether we need to update the path to
+ * point to right block or not */
+ if (curr_ptr >= split_ptr) {
+ /* Update the checksum for the left block */
+ ext4_extent_path_dirty(inode_ref, path, p->depth);
+
+ /* Put back the left block */
+ ret = ext4_block_set(inode_ref->fs->bdev,
+ &p->block);
+ if (ret != 0)
+ goto finish;
+
+ /* Update pointers in extent path structure to
+ * point to right block */
+ p->block = block;
+ p->header = (void *)block.data;
+
+ if (p->depth) {
+ p->index =
+ EXT4_EXTENT_FIRST_INDEX(p->header) +
+ curr_ptr - split_ptr;
+ } else {
+ p->extent =
+ EXT4_EXTENT_FIRST(p->header) +
+ curr_ptr - split_ptr;
+ }
+ } else {
+ /* Update the checksum for the right block */
+ ext4_extent_block_csum_set(inode_ref, header);
+ ext4_trans_set_block_dirty(block.buf);
+
+ /* Put back the right block */
+ ret = ext4_block_set(inode_ref->fs->bdev,
+ &block);
+ if (ret != 0)
+ goto finish;
+ }
+
+ /* Append an index after the current index */
+ if (p->depth) {
+ struct ext4_extent_index *index = p->index + 1;
+
+ /* If we updated the path to right block in the previous
+ * round, we update the pointer in the path to point to
+ * the right block */
+ if (prevrblock)
+ p->index++;
+
+ if (index <= EXT4_EXTENT_LAST_INDEX(p->header)) {
+ u16int nindex =
+ EXT4_EXTENT_LAST_INDEX(p->header) -
+ index + 1;
+
+ memmove(index + 1,
+ index,
+ nindex * EXT4_EXTENT_INDEX_SIZE);
+ }
+ memset(index, 0, EXT4_EXTENT_INDEX_SIZE);
+ ext4_extent_index_set_iblock(index, lastiblock);
+ ext4_extent_index_set_fblock(index, newfblocks[i - 1]);
+
+ entries = ext4_extent_header_get_nentries(p->header);
+ ext4_extent_header_set_nentries(p->header,
+ entries + 1);
+ }
+
+ ext4_extent_path_dirty(inode_ref, path, p->depth);
+
+ /* We may have updated the path to right block in this round */
+ prevrblock = curr_ptr >= split_ptr;
+
+ /* We also update the lastiblock variable to the index of the
+ * right block */
+ lastiblock = riblock;
+ }
+
+ /* Append an index after the current index */
+ if (p->depth) {
+ struct ext4_extent_index *index = p->index + 1;
+ u16int entries =
+ ext4_extent_header_get_nentries(p->header);
+
+ /* If we updated the path to right block in the previous
+ * round, we update the pointer in the path to point to
+ * the right block */
+ if (prevrblock)
+ p->index++;
+
+ if (index <= EXT4_EXTENT_LAST_INDEX(p->header)) {
+ u16int nindex =
+ EXT4_EXTENT_LAST_INDEX(p->header) -
+ index + 1;
+
+ memmove(index + 1,
+ index,
+ nindex * EXT4_EXTENT_INDEX_SIZE);
+ }
+ memset(index, 0, EXT4_EXTENT_INDEX_SIZE);
+ ext4_extent_index_set_iblock(index, lastiblock);
+ ext4_extent_index_set_fblock(index, newfblocks[i - 1]);
+ ext4_extent_header_set_nentries(p->header,
+ entries + 1);
+
+ ext4_extent_path_dirty(inode_ref, path, p->depth);
+ }
+
+ ret = 0;
+finish:
+ if (ret != 0)
+ for (i = 0; i < nnewfblocks; i++)
+ ext4_balloc_free_block(inode_ref, newfblocks[i]);
+
+ ext4_free(newfblocks);
+ return ret;
+}
+
+/**@brief Insert an extent into the extent tree
+ * @param inode_ref I-node the extent tree resides in
+ * @param path Path in the extent tree for possible splitting
+ * @param ext Extent to be inserted
+ * @return Error code */
+static int ext4_extent_insert(struct ext4_inode_ref *inode_ref,
+ struct ext4_extent_path *path,
+ struct ext4_extent *ext)
+{
+ int ret;
+ u16int entries;
+ struct ext4_extent_path *p;
+
+ /* Split and grow the tree if necessary */
+ ret = ext4_extent_split(inode_ref, path, 1);
+ if (ret != 0)
+ return ret;
+
+ p = path;
+ entries = ext4_extent_header_get_nentries(p->header);
+
+ ext4_dbg(DEBUG_EXTENT, DBG_INFO "After splitting: \n");
+ ext4_extent_print_path(inode_ref, path);
+
+ if (!p->extent) {
+ p->extent = EXT4_EXTENT_FIRST(p->header);
+ } else {
+ ext4_lblk_t iblock;
+
+ iblock = ext4_extent_get_iblock(p->extent);
+ if (ext4_extent_get_iblock(ext) > iblock)
+ p->extent++;
+ }
+
+ if (p->extent <= EXT4_EXTENT_LAST(p->header)) {
+ u16int nextent =
+ EXT4_EXTENT_LAST(p->header) -
+ p->extent + 1;
+
+ ext4_dbg(DEBUG_EXTENT,
+ DBG_INFO "%uhd extents to be shifted at leaf\n",
+ nextent);
+
+ memmove(p->extent + 1,
+ p->extent,
+ nextent * EXT4_EXTENT_SIZE);
+ }
+ memcpy(p->extent, ext, EXT4_EXTENT_SIZE);
+ ext4_extent_header_set_nentries(p->header,
+ entries + 1);
+
+ ext4_extent_path_dirty(inode_ref, path, p->depth);
+
+ ext4_dbg(DEBUG_EXTENT, DBG_INFO "Before updating indice: \n");
+ ext4_extent_print_path(inode_ref, path);
+
+ /* Update the index of the first entry in parents node */
+ ext4_extent_update_index(inode_ref, path, false);
+
+ ext4_dbg(DEBUG_EXTENT, DBG_INFO "At the end: \n");
+ ext4_extent_print_path(inode_ref, path);
+
+ return ret;
+}
+
+/**@brief Delete an item from the node at @depth pointed
+ * @param inode_ref I-node the extent tree resides in
+ * @param path Path in the extent tree for possible splitting
+ * @param depth The level of the node to be operated on
+ * @return Error code */
+static void
+ext4_extent_delete_item(struct ext4_inode_ref *inode_ref,
+ struct ext4_extent_path *path,
+ u16int depth)
+{
+ u16int nitems;
+ struct ext4_extent_header *hdr;
+ struct ext4_extent_path *p;
+
+ p = path + depth;
+
+ hdr = p->header;
+ assert(ext4_extent_header_get_nentries(hdr));
+
+ if (p->depth) {
+ struct ext4_extent_index *idx;
+
+ idx = p->index;
+ nitems = EXT4_EXTENT_LAST_INDEX(hdr) - (idx + 1) + 1;
+ if (nitems) {
+ memmove(idx, idx + 1,
+ nitems * EXT4_EXTENT_INDEX_SIZE);
+ memset(EXT4_EXTENT_LAST(hdr), 0,
+ EXT4_EXTENT_INDEX_SIZE);
+ } else {
+ memset(idx, 0, EXT4_EXTENT_INDEX_SIZE);
+ }
+ } else {
+ struct ext4_extent *ext;
+
+ ext = p->extent;
+ nitems = EXT4_EXTENT_LAST(hdr) - (ext + 1) + 1;
+ if (nitems) {
+ memmove(ext, ext + 1,
+ nitems * EXT4_EXTENT_SIZE);
+ memset(EXT4_EXTENT_LAST(hdr), 0,
+ EXT4_EXTENT_SIZE);
+ } else {
+ memset(ext, 0, EXT4_EXTENT_SIZE);
+ }
+ }
+
+ nitems = ext4_extent_header_get_nentries(hdr) - 1;
+ ext4_extent_header_set_nentries(hdr,
+ nitems);
+ ext4_extent_path_dirty(inode_ref, path, p->depth);
+}
+
+/**@brief Remove extents in a leaf starting
+ * from the current extent and having
+ * key less than or equal to @toiblock.
+ * @param inode_ref I-node the tree resides in
+ * @param path Path in the extent tree
+ * @param toiblock The logical block
+ * @param stopp Output value to tell whether the caller should
+ * stop deletion. Will be set to true if an extent having key greater
+ * than @toiblock is met.
+ * @return 0 if there is no error, or return values of blocks
+ * freeing routine. */
+static int
+ext4_extent_delete_leaf(struct ext4_inode_ref *inode_ref,
+ struct ext4_extent_path *path,
+ ext4_lblk_t toiblock,
+ bool *stopp)
+{
+ int ret = 0;
+ u16int nitems;
+ struct ext4_extent *ext;
+ struct ext4_extent_header *hdr;
+ struct ext4_extent_path *p;
+
+ p = path;
+ *stopp = false;
+
+ while (1) {
+ bool unwritten;
+ u16int ptr;
+ u16int len;
+ u16int flen;
+ ext4_lblk_t endiblock;
+ ext4_lblk_t startiblock;
+ ext4_fsblk_t blocknr;
+
+ hdr = p->header;
+ nitems = ext4_extent_header_get_nentries(hdr);
+ ptr = p->extent - EXT4_EXTENT_FIRST(hdr);
+
+ assert(nitems > 0);
+
+ ext = p->extent;
+ blocknr = ext4_extent_get_fblock(ext);
+ startiblock = ext4_extent_get_iblock(ext);
+ endiblock = startiblock + ext4_extent_get_nblocks(ext) - 1;
+ len = endiblock - startiblock + 1;
+ unwritten = EXT4_EXT_IS_UNWRITTEN(ext);
+
+ /* We have to stop if the extent's key
+ * is greater than @toiblock. */
+ if (toiblock < startiblock) {
+ *stopp = true;
+ break;
+ }
+
+ if (toiblock < endiblock) {
+ /* In case @toiblock is smaller than the last
+ * logical block of the extent, we do not
+ * need to delete the extent. We modify it only. */
+
+ /* Unmap the underlying blocks. */
+ flen = toiblock - startiblock + 1;
+ ext4_dbg(DEBUG_EXTENT,
+ DBG_INFO "Freeing: %llud:%uhd\n",
+ blocknr, flen);
+ ext4_balloc_free_blocks(inode_ref, blocknr, flen);
+
+ /* Adjust the starting block and length of the
+ * current extent. */
+ blocknr += flen;
+ startiblock = toiblock + 1;
+ len = endiblock - startiblock + 1;
+ ext4_extent_set_iblock(ext, startiblock);
+ ext4_extent_set_nblocks(ext, len, unwritten);
+ ext4_extent_set_fblock(ext, blocknr);
+
+ ext4_extent_path_dirty(inode_ref, path, p->depth);
+
+ *stopp = 1;
+ break;
+ }
+
+ /* Delete the extent pointed to by the path. */
+ ext4_extent_delete_item(inode_ref, path, 0);
+ nitems--;
+
+ /* Unmap the underlying blocks. */
+ flen = len;
+ ext4_dbg(DEBUG_EXTENT,
+ DBG_INFO "Freeing: %llud:%uhd\n",
+ blocknr, flen);
+ ext4_balloc_free_blocks(inode_ref, blocknr, flen);
+
+ /* There are no more items we could delete. */
+ if (ptr >= nitems)
+ break;
+ }
+ return ret;
+}
+
+/**@brief Remove the current index at specified level.
+ * @param cur Cursor to an extent tree
+ * @param depth The level where deletion takes place at
+ * @return 0 if there is no error, or return values of blocks
+ * freeing routine. */
+static int
+ext4_extent_delete_node(struct ext4_inode_ref *inode_ref,
+ struct ext4_extent_path *path,
+ u16int depth)
+{
+ int ret = 0;
+ ext4_fsblk_t fblock;
+ struct ext4_extent_index *idx;
+ struct ext4_extent_header *hdr;
+ struct ext4_extent_path *p;
+
+ /* If we leave nothing in the node after deletion of
+ * an item, we free the block and delete the index
+ * of the node. Get the respective key of the node
+ * in the parent level */
+ p = path + depth;
+ hdr = p->header;
+ assert(ext4_extent_header_get_nentries(hdr) > 0);
+ idx = p->index;
+ fblock = ext4_extent_index_get_fblock(idx);
+
+ /* Delete the index pointed to by the path. */
+ ext4_extent_delete_item(inode_ref, path, depth);
+
+ /* Free the block of it. */
+ ext4_dbg(DEBUG_EXTENT,
+ DBG_INFO "Freeing: %llud:%uhd\n",
+ fblock, 1);
+ ext4_balloc_free_blocks(inode_ref, fblock, 1);
+
+ return ret;
+}
+
+/**@brief Delete the mapping in extent tree starting from \p fromiblock to
+ * \p toiblock inclusively.
+ * @param cur Cursor to an extent tree
+ * @return 0 on success, ENOENT if there is no item to be deleted,
+ * return values of ext4_ext_increment(), ext4_ext_insert(),
+ * ext4_ext_delete_leaf(), ext4_ext_delete_node() ext4_ext_reload_paths(),
+ * ext4_ext_tree_shrink(). Cursor MUST be discarded after deletion.
+ */
+int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref,
+ ext4_lblk_t fromiblock,
+ ext4_lblk_t toiblock)
+{
+ int ret;
+ u16int nitems;
+ int rootdepth;
+ struct ext4_extent_header *hdr;
+ struct ext4_extent *ext;
+ ext4_lblk_t endiblock;
+ ext4_lblk_t startiblock;
+ struct ext4_extent_path *path, *p;
+
+ rootdepth = ext4_extent_tree_depth(inode_ref);
+
+ ret = ext4_extent_find_extent(inode_ref, fromiblock, &path);
+ if (ret != 0)
+ return ret;
+
+ p = path;
+ hdr = p->header; USED(hdr);
+
+ /* We return 0 even if the whole extent tree is empty. */
+ if (!ext4_extent_header_get_nentries(path->header))
+ goto out;
+
+ /* Calculate the last logical block of the current extent. */
+ ext4_dbg(DEBUG_EXTENT, DBG_INFO "At start of remove_space: \n");
+ ext4_extent_print_path(inode_ref, path);
+
+ ext = p->extent;
+ startiblock = ext4_extent_get_iblock(ext);
+ endiblock = startiblock + ext4_extent_get_nblocks(ext) - 1;
+
+ ext4_dbg(DEBUG_EXTENT,
+ DBG_INFO "Extent: %ud:%uhd\n",
+ startiblock, endiblock);
+
+ if (fromiblock > endiblock) {
+ bool nonext;
+
+ /* The last logical block of the current extent is smaller
+ * than the first logical block we are going to remove,
+ * thus we increment the extent pointer of the cursor. */
+
+ /* Increment the extent pointer to point to the
+ * next extent. */
+ ret = ext4_extent_increment(inode_ref, path, &nonext);
+ if (ret != 0)
+ goto out;
+
+ /* The current extent is already the last extent in
+ * the tree, so we just return success here. */
+ if (nonext)
+ goto out;
+ } else if (fromiblock > startiblock) {
+ bool unwritten;
+ u16int len;
+
+ /* @fromiblock is in the range of the current extent,
+ * but does not sit right on the starting block.
+ *
+ * In this case we need to modify the current extent.
+ * and free some blocks, since we do not really want
+ * to remove and reinsert a new one. */
+
+ len = fromiblock - startiblock;
+ unwritten = EXT4_EXT_IS_UNWRITTEN(ext);
+ ext4_extent_set_nblocks(ext, len, unwritten);
+
+ ext4_extent_path_dirty(inode_ref, path, p->depth);
+
+ /* Free the range of blocks starting from @fromiblock
+ * up to either @endiblock or @toiblock. */
+ if (toiblock < endiblock) {
+ u16int flen;
+ ext4_fsblk_t blocknr;
+ struct ext4_extent next;
+
+ /* In case we free up space inside an extent
+ * while not touching both ends, we need to
+ * unavoidably insert a new extent right after
+ * the modified current extent, and that may
+ * cause tree splitting. */
+
+ /* Now we need to free up space first. */
+ flen = toiblock - fromiblock + 1;
+ blocknr = ext4_extent_get_fblock(ext) + len;
+ ext4_dbg(DEBUG_EXTENT,
+ DBG_INFO "Freeing: %llud:%uhd\n",
+ blocknr, flen);
+ ext4_balloc_free_blocks(inode_ref, blocknr, flen);
+
+ blocknr += flen;
+ startiblock = fromiblock + flen;
+ len = endiblock - startiblock + 1;
+
+ ext4_extent_set_iblock(&next, startiblock);
+ ext4_extent_set_nblocks(&next, len, unwritten);
+ ext4_extent_set_fblock(&next, blocknr);
+ ret = ext4_extent_insert(inode_ref, path, &next);
+
+ /* After we free up the space and insert a new
+ * extent, we are done. */
+ goto out;
+ } else {
+ bool nonext;
+ u16int flen;
+ ext4_fsblk_t blocknr;
+
+ /* Otherwise we do not need any insertion,
+ * which also means that no extra space may be
+ * allocated for tree splitting. */
+ flen = endiblock - fromiblock + 1;
+ blocknr = ext4_extent_get_fblock(ext) + len;
+
+ /* Now we need to free up space first. */
+ ext4_dbg(DEBUG_EXTENT,
+ DBG_INFO "Freeing: %llud:%uhd\n",
+ blocknr, flen);
+ ext4_balloc_free_blocks(inode_ref, blocknr, flen);
+
+ /* Increment the extent pointer to point to the
+ * next extent. */
+ ret = ext4_extent_increment(inode_ref, path, &nonext);
+ if (ret != 0 || nonext)
+ goto out;
+ }
+ }
+
+ while (p <= path + rootdepth) {
+ struct ext4_extent_path *chldp;
+
+ hdr = p->header;
+
+ if (!p->depth) {
+ bool stop;
+
+ /* Delete as much extents as we can. */
+ ret = ext4_extent_delete_leaf(inode_ref,
+ path,
+ toiblock,
+ &stop);
+ if (ret != 0)
+ goto out;
+
+ if (stop) {
+ /* Since the current extent has its logical
+ * block number greater than @toiblock,
+ * we are done. */
+ break;
+ }
+ /* Since there are no more items in the leaf,
+ * we have to go one level above to switch to the
+ * next leaf. */
+ p++;
+ continue;
+ }
+
+ chldp = p - 1;
+ nitems = ext4_extent_header_get_nentries(chldp->header);
+
+ /* Now we don't need the children path anymore. */
+ ext4_block_set(inode_ref->fs->bdev, &chldp->block);
+ if (!nitems) {
+ ret = ext4_extent_delete_node(inode_ref, path, p->depth);
+ if (ret != 0)
+ goto out;
+
+ if (p->index > EXT4_EXTENT_LAST_INDEX(hdr)) {
+ /* Go one level above */
+ p++;
+ } else {
+ ret = ext4_extent_reload_paths(inode_ref, path, p->depth, false);
+ if (ret != 0)
+ goto out;
+ /* Go to the bottom level (aka the leaf). */
+ p = path;
+ }
+ } else {
+ if (p->index == EXT4_EXTENT_LAST_INDEX(hdr)) {
+ /* Go one level above */
+ p++;
+ } else {
+ p->index++;
+ ret = ext4_extent_reload_paths(inode_ref, path, p->depth, false);
+ if (ret != 0)
+ goto out;
+ /* Go to the bottom level (aka the leaf). */
+ p = path;
+ }
+ }
+ }
+
+ /* The above code can only exit in either situations:
+ *
+ * 1. We found that there is no more extents at the right
+ * (p < path)
+ * 2. We found that the next extent has key larger than @toiblock
+ * (p at leaf) */
+ assert(p == path || p > path + rootdepth);
+ if (p == path) {
+ /* We might have removed the leftmost key in the node,
+ * so we need to update the first key of the right
+ * sibling at every level until we meet a non-leftmost
+ * key. */
+ ext4_extent_update_index(inode_ref, path, true);
+ } else {
+ /* Put loaded blocks. We won't double-release
+ * in this case since the depth of tree will
+ * be reset to 0. */
+ ext4_extent_path_release(inode_ref, path);
+
+ hdr = ext4_inode_get_extent_header(inode_ref->inode);
+ if (!ext4_extent_header_get_nentries(hdr)) {
+ /* For empty root we need to make sure that the
+ * depth of the root level is 0. */
+ ext4_extent_header_set_nentries(hdr, 0);
+ ext4_extent_header_set_depth(hdr, 0);
+ inode_ref->dirty = true;
+ }
+ }
+
+out:
+ /* Put loaded blocks */
+ ext4_extent_path_release(inode_ref, path);
+
+ /* Destroy temporary data structure */
+ ext4_free(path);
+
+ return ret;
+}
+
+/**@brief Zero a range of blocks
+ * @param inode_ref I-node
+ * @param fblock starting block number to be zeroed
+ * @param nblocks number of blocks to be zeroed
+ * @return Error code */
+static int ext4_extent_zero_fblocks(struct ext4_inode_ref *inode_ref,
+ ext4_fsblk_t fblock,
+ ext4_lblk_t nblocks)
+{
+ int ret = 0;
+ ext4_lblk_t i;
+ u32int blocksz;
+
+ blocksz = ext4_sb_get_block_size(&inode_ref->fs->sb);
+ for (i = 0; i < nblocks; i++) {
+ struct ext4_block bh = EXT4_BLOCK_ZERO();
+ ret = ext4_trans_block_get_noread(inode_ref->fs->bdev, &bh,
+ fblock + i);
+ if (ret != 0)
+ break;
+
+ memset(bh.data, 0, blocksz);
+ ext4_trans_set_block_dirty(bh.buf);
+ ret = ext4_block_set(inode_ref->fs->bdev, &bh);
+ if (ret != 0)
+ break;
+ }
+ return ret;
+}
+
+/**@brief Convert unwritten mapping to written one
+ * @param inode_ref I-node
+ * @param path Path in the extent tree
+ * @param iblock starting logical block to be converted
+ * @param nblocks number of blocks to be converted
+ * @return Error code */
+int ext4_extent_convert_written(struct ext4_inode_ref *inode_ref,
+ struct ext4_extent_path *path,
+ ext4_lblk_t iblock,
+ ext4_lblk_t nblocks)
+{
+ int ret;
+ ext4_lblk_t eiblock;
+ ext4_lblk_t enblocks;
+ ext4_fsblk_t efblock;
+ struct ext4_extent *ext;
+
+ ext = path[0].extent;
+ assert(ext);
+
+ eiblock = ext4_extent_get_iblock(ext);
+ enblocks = ext4_extent_get_nblocks(ext);
+ efblock = ext4_extent_get_fblock(ext);
+ assert(EXT4_EXTENT_IN_RANGE(iblock, eiblock, enblocks));
+
+ /* There are four cases we need to handle */
+ if (iblock == eiblock && nblocks == enblocks) {
+ /* Case 1: the whole extent has to be converted.
+ * This is the simplest scenario. We just need
+ * to mark the extent "written", and zero the
+ * blocks covered by the extent */
+ ret = ext4_extent_zero_fblocks(inode_ref, efblock, enblocks);
+ if (ret != 0)
+ return ret;
+ EXT4_EXT_SET_WRITTEN(ext);
+ ext4_extent_path_dirty(inode_ref, path, 0);
+ } else if (iblock == eiblock) {
+ /* Case 2: convert the first part of the extent to written
+ * and insert an unwritten extent after that */
+ ext4_lblk_t newiblock;
+ ext4_lblk_t newnblocks;
+ ext4_fsblk_t newfblock;
+ struct ext4_extent insext;
+
+ /* The new extent we are going to insert */
+ newiblock = eiblock + nblocks;
+ newnblocks = eiblock + enblocks - newiblock;
+ newfblock = efblock + nblocks;
+
+ /* Zero the blocks covered by the first part of the extent */
+ ret = ext4_extent_zero_fblocks(inode_ref,
+ efblock + iblock - eiblock,
+ nblocks);
+ if (ret != 0)
+ return ret;
+
+ /* Trim the current extent and convert the extent to written */
+ ext4_extent_set_nblocks(ext, enblocks - nblocks, false);
+ ext4_extent_path_dirty(inode_ref, path, 0);
+
+ /* Insert the new extent */
+ ext4_extent_set_iblock(&insext, newiblock);
+ ext4_extent_set_nblocks(&insext, newnblocks, true);
+ ext4_extent_set_fblock(&insext, newfblock);
+ ret = ext4_extent_insert(inode_ref, path, &insext);
+ if (ret != 0)
+ /* In case when something happens during insertion
+ * we revert the trimming of the current extent */
+ ext4_extent_set_nblocks(ext, nblocks, true);
+ } else if (iblock + nblocks == eiblock + enblocks) {
+ /* Case 3: convert the second part of the extent to written.
+ * We insert an written extent after the current extent */
+ ext4_lblk_t newiblock;
+ ext4_lblk_t newnblocks;
+ ext4_fsblk_t newfblock;
+ struct ext4_extent insext;
+
+ /* The new extent we are going to insert */
+ newiblock = iblock;
+ newnblocks = nblocks;
+ newfblock = efblock + iblock - eiblock;
+
+ /* Zero the blocks covered by the first part of the extent */
+ ret = ext4_extent_zero_fblocks(inode_ref, newfblock, newnblocks);
+ if (ret != 0)
+ return ret;
+
+ /* Trim the current extent */
+ ext4_extent_set_nblocks(ext, enblocks - nblocks, true);
+ ext4_extent_path_dirty(inode_ref, path, 0);
+
+ /* Insert the new extent */
+ ext4_extent_set_iblock(&insext, newiblock);
+ ext4_extent_set_nblocks(&insext, newnblocks, false);
+ ext4_extent_set_fblock(&insext, newfblock);
+ ret = ext4_extent_insert(inode_ref, path, &insext);
+ if (ret != 0)
+ /* In case when something happens during insertion
+ * we revert the trimming of the current extent */
+ ext4_extent_set_nblocks(ext, nblocks, true);
+ } else {
+ /* Case 4: convert the middle part of the extent to written.
+ * We insert one written extent, follow by an unwritten
+ * extent */
+ ext4_lblk_t newiblock[2];
+ ext4_lblk_t newnblocks[2];
+ ext4_fsblk_t newfblock[2];
+ struct ext4_extent insext;
+
+ /* The new extents we are going to insert */
+ newiblock[0] = iblock;
+ newnblocks[0] = nblocks;
+ newfblock[0] = efblock + iblock - eiblock;
+ newiblock[1] = iblock + nblocks;
+ newnblocks[1] = eiblock + enblocks - newiblock[1];
+ newfblock[1] = newfblock[0] + nblocks;
+
+ /* Zero the blocks covered by the written extent */
+ ret = ext4_extent_zero_fblocks(inode_ref, newfblock[0],
+ newnblocks[0]);
+ if (ret != 0)
+ return ret;
+
+ /* We don't want to fail in the middle because we
+ * run out of space. From now on the subsequent
+ * insertions cannot fail */
+ ret = ext4_extent_split(inode_ref, path, 2);
+ if (ret != 0)
+ return ret;
+
+ /* Trim the current extent */
+ ext4_extent_set_nblocks(ext,
+ enblocks - newnblocks[0] - newnblocks[1],
+ true);
+ ext4_extent_path_dirty(inode_ref, path, 0);
+
+ /* Insert the written extent first */
+ ext4_extent_set_iblock(&insext, newiblock[0]);
+ ext4_extent_set_nblocks(&insext, newnblocks[0], false);
+ ext4_extent_set_fblock(&insext, newfblock[0]);
+ ret = ext4_extent_insert(inode_ref, path, &insext);
+ assert(ret == 0);
+
+ /* Then insert the unwritten extent */
+ ext4_extent_set_iblock(&insext, newiblock[1]);
+ ext4_extent_set_nblocks(&insext , newnblocks[1], true);
+ ext4_extent_set_fblock(&insext, newfblock[1]);
+ ret = ext4_extent_insert(inode_ref, path, &insext);
+ assert(ret == 0);
+ }
+ return ret;
+}
+
+/**@brief Check if the second extent can be appended to the first extent
+ * @param ext the first extent
+ * @param ext2 the second extent
+ * @return true if the two extents can be merged, otherwise false */
+static bool ext4_extent_can_append(struct ext4_extent *ext,
+ struct ext4_extent *ext2)
+{
+ bool unwritten;
+ ext4_lblk_t eiblock[2];
+ ext4_lblk_t enblocks[2];
+ ext4_fsblk_t efblock[2];
+
+ eiblock[0] = ext4_extent_get_iblock(ext);
+ enblocks[0] = ext4_extent_get_nblocks(ext);
+ efblock[0] = ext4_extent_get_fblock(ext);
+ eiblock[1] = ext4_extent_get_iblock(ext2);
+ enblocks[1] = ext4_extent_get_nblocks(ext2);
+ efblock[1] = ext4_extent_get_fblock(ext2);
+
+ /* We can't merge an unwritten extent with a written
+ * extent */
+ if (EXT4_EXT_IS_UNWRITTEN(ext) != EXT4_EXT_IS_UNWRITTEN(ext2))
+ return false;
+
+ unwritten = EXT4_EXT_IS_UNWRITTEN(ext);
+
+ /* Since the starting logical block of the second
+ * extent is greater than that of the first extent,
+ * we check whether we can append the second extent
+ * to the first extent */
+ if (eiblock[0] + enblocks[0] != eiblock[1] ||
+ efblock[0] + enblocks[0] != efblock[1])
+ /* If the two extents are not continuous
+ * in terms of logical block range and
+ * physical block range, we return false */
+ return false;
+
+ /* Check if the total number of blocks of the two extents are
+ * too long.
+ * Note: the maximum length of unwritten extent is shorter than
+ * written extent by one block */
+ if (unwritten) {
+ if (enblocks[0] + enblocks[1] > EXT4_EXT_MAX_LEN_UNWRITTEN)
+ return false;
+ } else {
+ if (enblocks[0] + enblocks[1] > EXT4_EXT_MAX_LEN_WRITTEN)
+ return false;
+ }
+
+ /* The second extent can be appended to the first extent */
+ return true;
+}
+
+/**@brief Check if the second extent can be prepended to the first extent
+ * @param ext the first extent
+ * @param ext2 the second extent
+ * @return true if the two extents can be merged, otherwise false */
+static bool ext4_extent_can_prepend(struct ext4_extent *ext,
+ struct ext4_extent *ext2)
+{
+ bool unwritten;
+ ext4_lblk_t eiblock[2];
+ ext4_lblk_t enblocks[2];
+ ext4_fsblk_t efblock[2];
+
+ eiblock[0] = ext4_extent_get_iblock(ext);
+ enblocks[0] = ext4_extent_get_nblocks(ext);
+ efblock[0] = ext4_extent_get_fblock(ext);
+ eiblock[1] = ext4_extent_get_iblock(ext2);
+ enblocks[1] = ext4_extent_get_nblocks(ext2);
+ efblock[1] = ext4_extent_get_fblock(ext2);
+
+ /* We can't merge an unwritten extent with a written
+ * extent */
+ if (EXT4_EXT_IS_UNWRITTEN(ext) != EXT4_EXT_IS_UNWRITTEN(ext2))
+ return false;
+
+ unwritten = EXT4_EXT_IS_UNWRITTEN(ext);
+
+ /* Since the starting logical block of the second
+ * extent is smaller than that of the first extent,
+ * we check whether we can prepend the second extent
+ * to the first extent */
+ if (eiblock[1] + enblocks[1] != eiblock[0] ||
+ efblock[1] + enblocks[1] != efblock[0])
+ /* If the two extents are not continuous
+ * in terms of logical block range and
+ * physical block range, we return false */
+ return false;
+
+ /* Check if the total number of blocks of the two extents are
+ * too long.
+ * Note: the maximum length of unwritten extent is shorter than
+ * written extent by one block */
+ if (unwritten) {
+ if (enblocks[0] + enblocks[1] > EXT4_EXT_MAX_LEN_UNWRITTEN)
+ return false;
+ } else {
+ if (enblocks[0] + enblocks[1] > EXT4_EXT_MAX_LEN_WRITTEN)
+ return false;
+ }
+
+ /* The second extent can be prepended to the first extent */
+ return true;
+}
+
+/**@brief Allocate multiple number of blocks
+ * @param inode_ref I-node
+ * @param goal physical block allocation hint
+ * @param nblocks number of blocks to be allocated
+ * @param fblockp Output value - starting physical block number
+ * @param nblocksp Output value - the number of blocks allocated
+ * @return Error code */
+static int
+ext4_extent_alloc_datablocks(struct ext4_inode_ref *inode_ref,
+ ext4_fsblk_t goal,
+ ext4_lblk_t nblocks,
+ ext4_fsblk_t *fblockp,
+ ext4_lblk_t *nblocksp)
+{
+ int ret = 0;
+ ext4_lblk_t i;
+ ext4_fsblk_t retfblock;
+ ext4_lblk_t retnblocks = 0;
+
+ for (i = 0; i < nblocks; ++i, ++retnblocks) {
+ bool free = false;
+
+ if (!i) {
+ /* We allocate the first block by using
+ * ext4_balloc_alloc_block() so that we
+ * can pass allocation hint to the block
+ * allocator */
+ ret = ext4_balloc_alloc_block(inode_ref,
+ goal,
+ &retfblock);
+ if (ret == 0)
+ free = true;
+ } else {
+ ext4_fsblk_t blockscnt;
+
+ /* Do a check to make sure that we won't look into
+ * a block number larger than the total number of
+ * blocks we have on this filesystem */
+ blockscnt = ext4_sb_get_blocks_cnt(&inode_ref->fs->sb);
+ if (retfblock + i < blockscnt) {
+ ret = ext4_balloc_try_alloc_block(inode_ref,
+ retfblock + i, &free);
+ } else
+ free = false;
+ }
+
+ /* Stop trying on the next block if we encounter errors, or
+ * if there is insufficient space, or if we can't allocate
+ * blocks continuously */
+ if (ret != 0 || !free)
+ break;
+ }
+
+ if (ret == 0) {
+ *fblockp = retfblock;
+ if (nblocksp)
+ *nblocksp = nblocks;
+ }
+ return ret;
+}
+
+/**@brief Extent-based blockmap manipulation
+ * @param inode_ref I-node
+ * @param iblock starting logical block of the inode
+ * @param max_nblocks maximum number of blocks to get from/allocate to blockmap
+ * @param resfblockp return physical block address of the first block of an
+ * extent
+ * @param create true if caller wants to insert mapping or convert
+ * unwritten mapping to written one
+ * @param resnblocksp return number of blocks in an extent (must be smaller than
+ * \p max_nblocks)
+ * @return Error code*/
+int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref,
+ ext4_lblk_t iblock,
+ ext4_lblk_t max_nblocks,
+ ext4_fsblk_t *resfblockp,
+ bool create,
+ ext4_lblk_t *resnblocksp)
+{
+ int ret;
+ struct ext4_extent_path *path;
+ struct ext4_extent *ext;
+ struct ext4_extent insext;
+ ext4_lblk_t eiblock;
+ ext4_lblk_t enblocks;
+ ext4_fsblk_t efblock;
+ ext4_fsblk_t resfblock;
+ ext4_lblk_t resnblocks = 0;
+ ext4_fsblk_t goal;
+
+ /* Seek to the corresponding extent */
+ ret = ext4_extent_find_extent(inode_ref, iblock, &path);
+ if (ret != 0)
+ return ret;
+
+ ext = path[0].extent;
+ if (ext) {
+ /* The extent tree is not empty */
+ eiblock = ext4_extent_get_iblock(ext);
+ enblocks = ext4_extent_get_nblocks(ext);
+ efblock = ext4_extent_get_fblock(ext);
+ if (EXT4_EXTENT_IN_RANGE(iblock, eiblock, enblocks)) {
+ /* The extent exists and logical block requested falls
+ * into the range of the extent */
+ resfblock = efblock + iblock - eiblock;
+ resnblocks = eiblock + enblocks - iblock;
+
+ /* Trim the result if it is larger than the maximum
+ * length the caller wants */
+ if (resnblocks > max_nblocks)
+ resnblocks = max_nblocks;
+
+ if (EXT4_EXT_IS_UNWRITTEN(ext)) {
+ if (create)
+ /* Convert the extent to written extent
+ * if the extent is unwritten extent */
+ ret = ext4_extent_convert_written(inode_ref,
+ path,
+ iblock,
+ resnblocks);
+ else
+ /* We are not asked to modify the blockmap
+ * so we just return a hole */
+ resfblock = 0;
+ }
+ goto cleanup;
+ }
+ if (!create) {
+ /* Don't waste time on finding the next extent if we
+ * are not asked to insert mapping, just return a
+ * hole */
+ resfblock = 0;
+ resnblocks = 1;
+ goto cleanup;
+ }
+ if (ext4_extent_get_iblock(ext) < iblock) {
+ /* Since the logical block of current extent is smaller
+ * the requested logical block, we seek to the next
+ * extent to find the maximum number of blocks we can
+ * allocate without hitting the starting logical block
+ * of the next extent */
+ bool nonext;
+
+ /* Go to the next extent */
+ ret = ext4_extent_increment(inode_ref, path, &nonext);
+ if (ret != 0)
+ goto cleanup;
+
+ if (!nonext) {
+ /* We successfully reach the next extent */
+ bool noprev;
+ ext4_lblk_t neiblock;
+
+ ext = path[0].extent;
+
+ /* The next extent must start at greater logical
+ * block number */
+ assert(ext4_extent_get_iblock(ext) >
+ iblock);
+
+ /* Calculate the maximum number of blocks we
+ * can allocate without overlapping with the
+ * next extent */
+ neiblock = ext4_extent_get_iblock(ext);
+ if (max_nblocks > neiblock - iblock)
+ max_nblocks = neiblock - iblock;
+
+ /* Go back to the previous extent */
+ ret = ext4_extent_decrement(inode_ref, path,
+ &noprev);
+ if (ret != 0)
+ goto cleanup;
+ assert(!noprev);
+ ext = path[0].extent;
+ }
+ }
+ }
+
+ /* Return a hole if we are not asked to insert mapping */
+ if (!create) {
+ resfblock = 0;
+ resnblocks = 1;
+ goto cleanup;
+ }
+
+ /* Multiple data blocks allocation */
+ goal = ext4_extent_data_alloc_goal(inode_ref, path, iblock);
+ ret = ext4_extent_alloc_datablocks(inode_ref, goal, max_nblocks,
+ &resfblock, &max_nblocks);
+ if (ret != 0)
+ goto cleanup;
+
+ ext4_extent_set_iblock(&insext, iblock);
+ ext4_extent_set_nblocks(&insext, max_nblocks, false);
+ ext4_extent_set_fblock(&insext, resfblock);
+
+ if (ext && ext4_extent_can_append(ext, &insext)) {
+ /* Clang won't complain, it's just to make gcc happy */
+ enblocks = ext4_extent_get_nblocks(ext);
+
+ /* If we can append this extent to the current extent */
+ ext4_extent_set_nblocks(ext, enblocks + max_nblocks,
+ EXT4_EXT_IS_UNWRITTEN(ext));
+
+ ext4_extent_path_dirty(inode_ref, path, 0);
+ } else if (ext && ext4_extent_can_prepend(ext, &insext)) {
+ /* Clang won't complain, it's just to make gcc happy */
+ enblocks = ext4_extent_get_nblocks(ext);
+
+ /* If we can prepend this extent to the current extent */
+ ext4_extent_set_iblock(ext, iblock);
+ ext4_extent_set_nblocks(ext, enblocks + max_nblocks,
+ EXT4_EXT_IS_UNWRITTEN(ext));
+ ext4_extent_set_fblock(ext, resfblock);
+
+ /* If we are working on the first extent in the
+ * first leaf (in case we are actually prepending
+ * mappings) we need to update the index of nodes.
+ *
+ * NOTE: Since we don't seek to the next extent and
+ * try to modify it, prepending should not happen at
+ * any leaves except the first extent of the first leaf */
+ ext4_extent_update_index(inode_ref, path, false);
+ ext4_extent_path_dirty(inode_ref, path, 0);
+ } else {
+ /* Finally, insert a new extent into the extent tree */
+ ret = ext4_extent_insert(inode_ref, path, &insext);
+ if (ret != 0)
+ ext4_balloc_free_blocks(inode_ref, resfblock,
+ max_nblocks);
+ }
+
+ resnblocks = max_nblocks;
+
+cleanup:
+ /* Put loaded blocks */
+ ext4_extent_path_release(inode_ref, path);
+
+ /* Destroy temporary data structure */
+ ext4_free(path);
+
+ if (ret == 0) {
+ if (resfblockp)
+ *resfblockp = resfblock;
+ if (resnblocksp)
+ *resnblocksp = resnblocks;
+ }
+
+ return ret;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_fs.c
@@ -1,0 +1,1699 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_trans.h"
+#include "ext4_fs.h"
+#include "ext4_blockdev.h"
+#include "ext4_super.h"
+#include "ext4_crc32.h"
+#include "ext4_block_group.h"
+#include "ext4_balloc.h"
+#include "ext4_bitmap.h"
+#include "ext4_inode.h"
+#include "ext4_ialloc.h"
+#include "ext4_extent.h"
+
+int ext4_fs_init(struct ext4_fs *fs, struct ext4_blockdev *bdev,
+ bool read_only)
+{
+ int r, i;
+ u16int tmp;
+ u32int bsize;
+
+ assert(fs && bdev);
+
+ fs->bdev = bdev;
+
+ fs->read_only = read_only;
+
+ r = ext4_sb_read(fs->bdev, &fs->sb);
+ if (r != 0)
+ return r;
+
+ if (!ext4_sb_check(&fs->sb)) {
+ werrstr("superblock: %r");
+ return -1;
+ }
+
+ bsize = ext4_sb_get_block_size(&fs->sb);
+ if (bsize > EXT4_MAX_BLOCK_SIZE) {
+ werrstr("invalid block size: %d", bsize);
+ return -1;
+ }
+
+ r = ext4_fs_check_features(fs, &read_only);
+ if (r != 0)
+ return r;
+
+ if (read_only)
+ fs->read_only = read_only;
+
+ /* Compute limits for indirect block levels */
+ u32int blocks_id = bsize / sizeof(u32int);
+
+ fs->inode_block_limits[0] = EXT4_INODE_DIRECT_BLOCK_COUNT;
+ fs->inode_blocks_per_level[0] = 1;
+
+ for (i = 1; i < 4; i++) {
+ fs->inode_blocks_per_level[i] =
+ fs->inode_blocks_per_level[i - 1] * blocks_id;
+ fs->inode_block_limits[i] = fs->inode_block_limits[i - 1] +
+ fs->inode_blocks_per_level[i];
+ }
+
+ /*Validate FS*/
+ tmp = ext4_get16(&fs->sb, state);
+ if (tmp & EXT4_SUPERBLOCK_STATE_ERROR_FS)
+ ext4_dbg(DEBUG_FS, DBG_WARN
+ "last umount error: superblock fs_error flag\n");
+
+
+ if (!fs->read_only) {
+ /* Mark system as mounted */
+ ext4_set16(&fs->sb, state, EXT4_SUPERBLOCK_STATE_ERROR_FS);
+ r = ext4_sb_write(fs->bdev, &fs->sb);
+ if (r != 0)
+ return r;
+
+ /*Update mount count*/
+ ext4_set16(&fs->sb, mount_count, ext4_get16(&fs->sb, mount_count) + 1);
+ }
+
+ return r;
+}
+
+int ext4_fs_fini(struct ext4_fs *fs)
+{
+ assert(fs);
+
+ /*Set superblock state*/
+ ext4_set16(&fs->sb, state, EXT4_SUPERBLOCK_STATE_VALID_FS);
+
+ if (!fs->read_only)
+ return ext4_sb_write(fs->bdev, &fs->sb);
+
+ return 0;
+}
+
+static void ext4_fs_debug_features_inc(u32int features_incompatible)
+{
+ if (features_incompatible & EXT4_FINCOM_COMPRESSION)
+ ext4_dbg(DEBUG_FS, DBG_NONE "compression\n");
+ if (features_incompatible & EXT4_FINCOM_FILETYPE)
+ ext4_dbg(DEBUG_FS, DBG_NONE "filetype\n");
+ if (features_incompatible & EXT4_FINCOM_RECOVER)
+ ext4_dbg(DEBUG_FS, DBG_NONE "recover\n");
+ if (features_incompatible & EXT4_FINCOM_JOURNAL_DEV)
+ ext4_dbg(DEBUG_FS, DBG_NONE "journal_dev\n");
+ if (features_incompatible & EXT4_FINCOM_META_BG)
+ ext4_dbg(DEBUG_FS, DBG_NONE "meta_bg\n");
+ if (features_incompatible & EXT4_FINCOM_EXTENTS)
+ ext4_dbg(DEBUG_FS, DBG_NONE "extents\n");
+ if (features_incompatible & EXT4_FINCOM_64BIT)
+ ext4_dbg(DEBUG_FS, DBG_NONE "64bit\n");
+ if (features_incompatible & EXT4_FINCOM_MMP)
+ ext4_dbg(DEBUG_FS, DBG_NONE "mnp\n");
+ if (features_incompatible & EXT4_FINCOM_FLEX_BG)
+ ext4_dbg(DEBUG_FS, DBG_NONE "flex_bg\n");
+ if (features_incompatible & EXT4_FINCOM_EA_INODE)
+ ext4_dbg(DEBUG_FS, DBG_NONE "ea_inode\n");
+ if (features_incompatible & EXT4_FINCOM_DIRDATA)
+ ext4_dbg(DEBUG_FS, DBG_NONE "dirdata\n");
+ if (features_incompatible & EXT4_FINCOM_BG_USE_META_CSUM)
+ ext4_dbg(DEBUG_FS, DBG_NONE "meta_csum\n");
+ if (features_incompatible & EXT4_FINCOM_LARGEDIR)
+ ext4_dbg(DEBUG_FS, DBG_NONE "largedir\n");
+ if (features_incompatible & EXT4_FINCOM_INLINE_DATA)
+ ext4_dbg(DEBUG_FS, DBG_NONE "inline_data\n");
+}
+static void ext4_fs_debug_features_comp(u32int features_compatible)
+{
+ if (features_compatible & EXT4_FCOM_DIR_PREALLOC)
+ ext4_dbg(DEBUG_FS, DBG_NONE "dir_prealloc\n");
+ if (features_compatible & EXT4_FCOM_IMAGIC_INODES)
+ ext4_dbg(DEBUG_FS, DBG_NONE "imagic_inodes\n");
+ if (features_compatible & EXT4_FCOM_HAS_JOURNAL)
+ ext4_dbg(DEBUG_FS, DBG_NONE "has_journal\n");
+ if (features_compatible & EXT4_FCOM_EXT_ATTR)
+ ext4_dbg(DEBUG_FS, DBG_NONE "ext_attr\n");
+ if (features_compatible & EXT4_FCOM_RESIZE_INODE)
+ ext4_dbg(DEBUG_FS, DBG_NONE "resize_inode\n");
+ if (features_compatible & EXT4_FCOM_DIR_INDEX)
+ ext4_dbg(DEBUG_FS, DBG_NONE "dir_index\n");
+}
+
+static void ext4_fs_debug_features_ro(u32int features_ro)
+{
+ if (features_ro & EXT4_FRO_COM_SPARSE_SUPER)
+ ext4_dbg(DEBUG_FS, DBG_NONE "sparse_super\n");
+ if (features_ro & EXT4_FRO_COM_LARGE_FILE)
+ ext4_dbg(DEBUG_FS, DBG_NONE "large_file\n");
+ if (features_ro & EXT4_FRO_COM_BTREE_DIR)
+ ext4_dbg(DEBUG_FS, DBG_NONE "btree_dir\n");
+ if (features_ro & EXT4_FRO_COM_HUGE_FILE)
+ ext4_dbg(DEBUG_FS, DBG_NONE "huge_file\n");
+ if (features_ro & EXT4_FRO_COM_GDT_CSUM)
+ ext4_dbg(DEBUG_FS, DBG_NONE "gtd_csum\n");
+ if (features_ro & EXT4_FRO_COM_DIR_NLINK)
+ ext4_dbg(DEBUG_FS, DBG_NONE "dir_nlink\n");
+ if (features_ro & EXT4_FRO_COM_EXTRA_ISIZE)
+ ext4_dbg(DEBUG_FS, DBG_NONE "extra_isize\n");
+ if (features_ro & EXT4_FRO_COM_QUOTA)
+ ext4_dbg(DEBUG_FS, DBG_NONE "quota\n");
+ if (features_ro & EXT4_FRO_COM_BIGALLOC)
+ ext4_dbg(DEBUG_FS, DBG_NONE "bigalloc\n");
+ if (features_ro & EXT4_FRO_COM_METADATA_CSUM)
+ ext4_dbg(DEBUG_FS, DBG_NONE "metadata_csum\n");
+}
+
+int ext4_fs_check_features(struct ext4_fs *fs, bool *read_only)
+{
+ assert(fs && read_only);
+ u32int v;
+ if (ext4_get32(&fs->sb, rev_level) == 0) {
+ *read_only = false;
+ return 0;
+ }
+
+ ext4_dbg(DEBUG_FS, DBG_INFO "sblock features_incompatible:\n");
+ ext4_fs_debug_features_inc(ext4_get32(&fs->sb, features_incompatible));
+
+ ext4_dbg(DEBUG_FS, DBG_INFO "sblock features_compatible:\n");
+ ext4_fs_debug_features_comp(ext4_get32(&fs->sb, features_compatible));
+
+ ext4_dbg(DEBUG_FS, DBG_INFO "sblock features_read_only:\n");
+ ext4_fs_debug_features_ro(ext4_get32(&fs->sb, features_read_only));
+
+ /*Check features_incompatible*/
+ v = ext4_get32(&fs->sb, features_incompatible) &
+ ~(EXT4_SUPPORTED_FINCOM | EXT_FINCOM_IGNORED);
+ if (v) {
+ ext4_dbg(DEBUG_FS, DBG_ERROR
+ "sblock has unsupported features incompatible:\n");
+ ext4_fs_debug_features_inc(v);
+ werrstr("unsupported features");
+ return -1;
+ }
+
+ /*Check features_read_only*/
+ v = ext4_get32(&fs->sb, features_read_only);
+ v &= ~EXT4_SUPPORTED_FRO_COM;
+ if (v) {
+ ext4_dbg(DEBUG_FS, DBG_WARN
+ "sblock has unsupported features read only:\n");
+ ext4_fs_debug_features_ro(v);
+ *read_only = true;
+ return 0;
+ }
+ *read_only = false;
+
+ return 0;
+}
+
+/**@brief Determine whether the block is inside the group.
+ * @param baddr block address
+ * @param bgid block group id
+ * @return Error code
+ */
+static bool ext4_block_in_group(struct ext4_sblock *s, ext4_fsblk_t baddr,
+ u32int bgid)
+{
+ u32int actual_bgid;
+ actual_bgid = ext4_balloc_get_bgid_of_block(s, baddr);
+ if (actual_bgid == bgid)
+ return true;
+ return false;
+}
+
+/**@brief To avoid calling the atomic setbit hundreds or thousands of times, we only
+ * need to use it within a single byte (to ensure we get endianness right).
+ * We can use memset for the rest of the bitmap as there are no other users.
+ */
+static void ext4_fs_mark_bitmap_end(int start_bit, int end_bit, void *bitmap)
+{
+ int i;
+
+ if (start_bit >= end_bit)
+ return;
+
+ for (i = start_bit; (unsigned)i < ((start_bit + 7) & ~7UL); i++)
+ ext4_bmap_bit_set(bitmap, i);
+
+ if (i < end_bit)
+ memset((char *)bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
+}
+
+/**@brief Initialize block bitmap in block group.
+ * @param bg_ref Reference to block group
+ * @return Error code
+ */
+static int ext4_fs_init_block_bitmap(struct ext4_block_group_ref *bg_ref)
+{
+ struct ext4_sblock *sb = &bg_ref->fs->sb;
+ struct ext4_bgroup *bg = bg_ref->block_group;
+ int rc;
+
+ u32int bit, bit_max;
+ u32int group_blocks;
+ u16int inode_size = ext4_get16(sb, inode_size);
+ u32int block_size = ext4_sb_get_block_size(sb);
+ u32int inodes_per_group = ext4_get32(sb, inodes_per_group);
+
+ ext4_fsblk_t i;
+ ext4_fsblk_t bmp_blk = ext4_bg_get_block_bitmap(bg, sb);
+ ext4_fsblk_t bmp_inode = ext4_bg_get_inode_bitmap(bg, sb);
+ ext4_fsblk_t inode_table = ext4_bg_get_inode_table_first_block(bg, sb);
+ ext4_fsblk_t first_bg = ext4_balloc_get_block_of_bgid(sb, bg_ref->index);
+
+ u32int dsc_per_block = block_size / ext4_sb_get_desc_size(sb);
+
+ bool flex_bg = ext4_sb_feature_incom(sb, EXT4_FINCOM_FLEX_BG);
+ bool meta_bg = ext4_sb_feature_incom(sb, EXT4_FINCOM_META_BG);
+
+ u32int inode_table_bcnt = inodes_per_group * inode_size / block_size;
+
+ struct ext4_block block_bitmap;
+ rc = ext4_trans_block_get_noread(bg_ref->fs->bdev, &block_bitmap, bmp_blk);
+ if (rc != 0)
+ return rc;
+
+ memset(block_bitmap.data, 0, block_size);
+ bit_max = ext4_sb_is_super_in_bg(sb, bg_ref->index);
+
+ u32int count = ext4_sb_first_meta_bg(sb) * dsc_per_block;
+ if (!meta_bg || bg_ref->index < count) {
+ if (bit_max) {
+ bit_max += ext4_bg_num_gdb(sb, bg_ref->index);
+ bit_max += ext4_get16(sb, s_reserved_gdt_blocks);
+ }
+ } else { /* For META_BG_BLOCK_GROUPS */
+ bit_max += ext4_bg_num_gdb(sb, bg_ref->index);
+ }
+ for (bit = 0; bit < bit_max; bit++)
+ ext4_bmap_bit_set(block_bitmap.data, bit);
+
+ if (bg_ref->index == ext4_block_group_cnt(sb) - 1) {
+ /*
+ * Even though mke2fs always initialize first and last group
+ * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
+ * to make sure we calculate the right free blocks
+ */
+
+ group_blocks = (u32int)(ext4_sb_get_blocks_cnt(sb) -
+ ext4_get32(sb, first_data_block) -
+ ext4_get32(sb, blocks_per_group) *
+ (ext4_block_group_cnt(sb) - 1));
+ } else {
+ group_blocks = ext4_get32(sb, blocks_per_group);
+ }
+
+ bool in_bg;
+ in_bg = ext4_block_in_group(sb, bmp_blk, bg_ref->index);
+ if (!flex_bg || in_bg)
+ ext4_bmap_bit_set(block_bitmap.data,
+ (u32int)(bmp_blk - first_bg));
+
+ in_bg = ext4_block_in_group(sb, bmp_inode, bg_ref->index);
+ if (!flex_bg || in_bg)
+ ext4_bmap_bit_set(block_bitmap.data,
+ (u32int)(bmp_inode - first_bg));
+
+ for (i = inode_table; i < inode_table + inode_table_bcnt; i++) {
+ in_bg = ext4_block_in_group(sb, i, bg_ref->index);
+ if (!flex_bg || in_bg)
+ ext4_bmap_bit_set(block_bitmap.data,
+ (u32int)(i - first_bg));
+ }
+ /*
+ * Also if the number of blocks within the group is
+ * less than the blocksize * 8 ( which is the size
+ * of bitmap ), set rest of the block bitmap to 1
+ */
+ ext4_fs_mark_bitmap_end(group_blocks, block_size * 8, block_bitmap.data);
+ ext4_trans_set_block_dirty(block_bitmap.buf);
+
+ ext4_balloc_set_bitmap_csum(sb, bg_ref->block_group, block_bitmap.data);
+ bg_ref->dirty = true;
+
+ /* Save bitmap */
+ return ext4_block_set(bg_ref->fs->bdev, &block_bitmap);
+}
+
+/**@brief Initialize i-node bitmap in block group.
+ * @param bg_ref Reference to block group
+ * @return Error code
+ */
+static int ext4_fs_init_inode_bitmap(struct ext4_block_group_ref *bg_ref)
+{
+ int rc;
+ struct ext4_sblock *sb = &bg_ref->fs->sb;
+ struct ext4_bgroup *bg = bg_ref->block_group;
+
+ /* Load bitmap */
+ ext4_fsblk_t bitmap_block_addr = ext4_bg_get_inode_bitmap(bg, sb);
+
+ struct ext4_block b;
+ rc = ext4_trans_block_get_noread(bg_ref->fs->bdev, &b, bitmap_block_addr);
+ if (rc != 0)
+ return rc;
+
+ /* Initialize all bitmap bits to zero */
+ u32int block_size = ext4_sb_get_block_size(sb);
+ u32int inodes_per_group = ext4_get32(sb, inodes_per_group);
+
+ memset(b.data, 0, (inodes_per_group + 7) / 8);
+
+ u32int start_bit = inodes_per_group;
+ u32int end_bit = block_size * 8;
+
+ u32int i;
+ for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
+ ext4_bmap_bit_set(b.data, i);
+
+ if (i < end_bit)
+ memset(b.data + (i >> 3), 0xff, (end_bit - i) >> 3);
+
+ ext4_trans_set_block_dirty(b.buf);
+
+ ext4_ialloc_set_bitmap_csum(sb, bg, b.data);
+ bg_ref->dirty = true;
+
+ /* Save bitmap */
+ return ext4_block_set(bg_ref->fs->bdev, &b);
+}
+
+/**@brief Initialize i-node table in block group.
+ * @param bg_ref Reference to block group
+ * @return Error code
+ */
+static int ext4_fs_init_inode_table(struct ext4_block_group_ref *bg_ref)
+{
+ struct ext4_sblock *sb = &bg_ref->fs->sb;
+ struct ext4_bgroup *bg = bg_ref->block_group;
+
+ u32int inode_size = ext4_get16(sb, inode_size);
+ u32int block_size = ext4_sb_get_block_size(sb);
+ u32int inodes_per_block = block_size / inode_size;
+ u32int inodes_in_group = ext4_inodes_in_group_cnt(sb, bg_ref->index);
+ u32int table_blocks = inodes_in_group / inodes_per_block;
+ ext4_fsblk_t fblock;
+
+ if (inodes_in_group % inodes_per_block)
+ table_blocks++;
+
+ /* Compute initialization bounds */
+ ext4_fsblk_t first_block = ext4_bg_get_inode_table_first_block(bg, sb);
+
+ ext4_fsblk_t last_block = first_block + table_blocks - 1;
+
+ /* Initialization of all itable blocks */
+ for (fblock = first_block; fblock <= last_block; ++fblock) {
+ struct ext4_block b;
+ int rc = ext4_trans_block_get_noread(bg_ref->fs->bdev, &b, fblock);
+ if (rc != 0)
+ return rc;
+
+ memset(b.data, 0, block_size);
+ ext4_trans_set_block_dirty(b.buf);
+
+ rc = ext4_block_set(bg_ref->fs->bdev, &b);
+ if (rc != 0)
+ return rc;
+ }
+
+ return 0;
+}
+
+static ext4_fsblk_t ext4_fs_get_descriptor_block(struct ext4_sblock *s,
+ u32int bgid,
+ u32int dsc_per_block)
+{
+ u32int first_meta_bg, dsc_id;
+ int has_super = 0;
+ dsc_id = bgid / dsc_per_block;
+ first_meta_bg = ext4_sb_first_meta_bg(s);
+
+ bool meta_bg = ext4_sb_feature_incom(s, EXT4_FINCOM_META_BG);
+
+ if (!meta_bg || dsc_id < first_meta_bg)
+ return ext4_get32(s, first_data_block) + dsc_id + 1;
+
+ if (ext4_sb_is_super_in_bg(s, bgid))
+ has_super = 1;
+
+ return (has_super + ext4_fs_first_bg_block_no(s, bgid));
+}
+
+/**@brief Compute checksum of block group descriptor.
+ * @param sb Superblock
+ * @param bgid Index of block group in the filesystem
+ * @param bg Block group to compute checksum for
+ * @return Checksum value
+ */
+static u16int ext4_fs_bg_checksum(struct ext4_sblock *sb, u32int bgid,
+ struct ext4_bgroup *bg)
+{
+ /* If checksum not supported, 0 will be returned */
+ u16int crc = 0;
+
+ /* Compute the checksum only if the filesystem supports it */
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+ /* Use metadata_csum algorithm instead */
+ u32int le32_bgid = to_le32(bgid);
+ u32int orig_checksum, checksum;
+
+ /* Preparation: temporarily set bg checksum to 0 */
+ orig_checksum = bg->checksum;
+ bg->checksum = 0;
+
+ /* First calculate crc32 checksum against fs uuid */
+ checksum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid,
+ sizeof(sb->uuid));
+ /* Then calculate crc32 checksum against bgid */
+ checksum = ext4_crc32c(checksum, &le32_bgid, sizeof(bgid));
+ /* Finally calculate crc32 checksum against block_group_desc */
+ checksum = ext4_crc32c(checksum, bg, ext4_sb_get_desc_size(sb));
+ bg->checksum = orig_checksum;
+
+ crc = checksum & 0xFFFF;
+ return crc;
+ }
+
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_GDT_CSUM)) {
+ u8int *base = (u8int *)bg;
+ u8int *checksum = (u8int *)&bg->checksum;
+
+ u32int offset = (u32int)(checksum - base);
+
+ /* Convert block group index to little endian */
+ u32int group = to_le32(bgid);
+
+ /* Initialization */
+ crc = ext4_bg_crc16(~0, sb->uuid, sizeof(sb->uuid));
+
+ /* Include index of block group */
+ crc = ext4_bg_crc16(crc, (u8int *)&group, sizeof(group));
+
+ /* Compute crc from the first part (stop before checksum field)
+ */
+ crc = ext4_bg_crc16(crc, (u8int *)bg, offset);
+
+ /* Skip checksum */
+ offset += sizeof(bg->checksum);
+
+ /* Checksum of the rest of block group descriptor */
+ if ((ext4_sb_feature_incom(sb, EXT4_FINCOM_64BIT)) &&
+ (offset < ext4_sb_get_desc_size(sb))) {
+
+ const u8int *start = ((u8int *)bg) + offset;
+ usize len = ext4_sb_get_desc_size(sb) - offset;
+ crc = ext4_bg_crc16(crc, start, len);
+ }
+ }
+ return crc;
+}
+
+static bool ext4_fs_verify_bg_csum(struct ext4_sblock *sb,
+ u32int bgid,
+ struct ext4_bgroup *bg)
+{
+ if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+ return true;
+
+ return ext4_fs_bg_checksum(sb, bgid, bg) == to_le16(bg->checksum);
+}
+
+int ext4_fs_get_block_group_ref(struct ext4_fs *fs, u32int bgid,
+ struct ext4_block_group_ref *ref)
+{
+ /* Compute number of descriptors, that fits in one data block */
+ u32int block_size = ext4_sb_get_block_size(&fs->sb);
+ u32int dsc_cnt = block_size / ext4_sb_get_desc_size(&fs->sb);
+
+ /* Block group descriptor table starts at the next block after
+ * superblock */
+ u64int block_id = ext4_fs_get_descriptor_block(&fs->sb, bgid, dsc_cnt);
+
+ u32int offset = (bgid % dsc_cnt) * ext4_sb_get_desc_size(&fs->sb);
+
+ int rc = ext4_trans_block_get(fs->bdev, &ref->block, block_id);
+ if (rc != 0)
+ return rc;
+
+ ref->block_group = (void *)(ref->block.data + offset);
+ ref->fs = fs;
+ ref->index = bgid;
+ ref->dirty = false;
+ struct ext4_bgroup *bg = ref->block_group;
+
+ if (!ext4_fs_verify_bg_csum(&fs->sb, bgid, bg)) {
+ ext4_dbg(DEBUG_FS,
+ DBG_WARN "Block group descriptor checksum failed."
+ "Block group index: %ud\n",
+ bgid);
+ }
+
+ if (ext4_bg_has_flag(bg, EXT4_BLOCK_GROUP_BLOCK_UNINIT)) {
+ rc = ext4_fs_init_block_bitmap(ref);
+ if (rc != 0) {
+ ext4_block_set(fs->bdev, &ref->block);
+ return rc;
+ }
+ ext4_bg_clear_flag(bg, EXT4_BLOCK_GROUP_BLOCK_UNINIT);
+ ref->dirty = true;
+ }
+
+ if (ext4_bg_has_flag(bg, EXT4_BLOCK_GROUP_INODE_UNINIT)) {
+ rc = ext4_fs_init_inode_bitmap(ref);
+ if (rc != 0) {
+ ext4_block_set(ref->fs->bdev, &ref->block);
+ return rc;
+ }
+
+ ext4_bg_clear_flag(bg, EXT4_BLOCK_GROUP_INODE_UNINIT);
+
+ if (!ext4_bg_has_flag(bg, EXT4_BLOCK_GROUP_ITABLE_ZEROED)) {
+ rc = ext4_fs_init_inode_table(ref);
+ if (rc != 0) {
+ ext4_block_set(fs->bdev, &ref->block);
+ return rc;
+ }
+
+ ext4_bg_set_flag(bg, EXT4_BLOCK_GROUP_ITABLE_ZEROED);
+ }
+
+ ref->dirty = true;
+ }
+
+ return 0;
+}
+
+int ext4_fs_put_block_group_ref(struct ext4_block_group_ref *ref)
+{
+ /* Check if reference modified */
+ if (ref->dirty) {
+ /* Compute new checksum of block group */
+ u16int cs;
+ cs = ext4_fs_bg_checksum(&ref->fs->sb, ref->index,
+ ref->block_group);
+ ref->block_group->checksum = to_le16(cs);
+
+ /* Mark block dirty for writing changes to physical device */
+ ext4_trans_set_block_dirty(ref->block.buf);
+ }
+
+ /* Put back block, that contains block group descriptor */
+ return ext4_block_set(ref->fs->bdev, &ref->block);
+}
+
+static u32int ext4_fs_inode_checksum(struct ext4_inode_ref *inode_ref)
+{
+ u32int checksum = 0;
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+ u16int inode_size = ext4_get16(sb, inode_size);
+
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+ u32int orig_checksum;
+
+ u32int ino_index = to_le32(inode_ref->index);
+ u32int ino_gen =
+ to_le32(ext4_inode_get_generation(inode_ref->inode));
+
+ /* Preparation: temporarily set bg checksum to 0 */
+ orig_checksum = ext4_inode_get_csum(sb, inode_ref->inode);
+ ext4_inode_set_csum(sb, inode_ref->inode, 0);
+
+ /* First calculate crc32 checksum against fs uuid */
+ checksum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid,
+ sizeof(sb->uuid));
+ /* Then calculate crc32 checksum against inode number
+ * and inode generation */
+ checksum = ext4_crc32c(checksum, &ino_index, sizeof(ino_index));
+ checksum = ext4_crc32c(checksum, &ino_gen, sizeof(ino_gen));
+ /* Finally calculate crc32 checksum against
+ * the entire inode */
+ checksum = ext4_crc32c(checksum, inode_ref->inode, inode_size);
+ ext4_inode_set_csum(sb, inode_ref->inode, orig_checksum);
+
+ /* If inode size is not large enough to hold the
+ * upper 16bit of the checksum */
+ if (inode_size == EXT4_GOOD_OLD_INODE_SIZE)
+ checksum &= 0xFFFF;
+
+ }
+ return checksum;
+}
+
+static void ext4_fs_set_inode_checksum(struct ext4_inode_ref *inode_ref)
+{
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+ if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+ return;
+
+ u32int csum = ext4_fs_inode_checksum(inode_ref);
+ ext4_inode_set_csum(sb, inode_ref->inode, csum);
+}
+
+static bool ext4_fs_verify_inode_csum(struct ext4_inode_ref *inode_ref)
+{
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+ if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+ return true;
+
+ return ext4_inode_get_csum(sb, inode_ref->inode) ==
+ ext4_fs_inode_checksum(inode_ref);
+}
+
+static int
+__ext4_fs_get_inode_ref(struct ext4_fs *fs, u32int index,
+ struct ext4_inode_ref *ref,
+ bool initialized)
+{
+ /* Compute number of i-nodes, that fits in one data block */
+ u32int inodes_per_group = ext4_get32(&fs->sb, inodes_per_group);
+
+ /*
+ * Inode numbers are 1-based, but it is simpler to work with 0-based
+ * when computing indices
+ */
+ index -= 1;
+ u32int block_group = index / inodes_per_group;
+ u32int offset_in_group = index % inodes_per_group;
+
+ /* Load block group, where i-node is located */
+ struct ext4_block_group_ref bg_ref;
+
+ int rc = ext4_fs_get_block_group_ref(fs, block_group, &bg_ref);
+ if (rc != 0) {
+ return rc;
+ }
+
+ /* Load block address, where i-node table is located */
+ ext4_fsblk_t inode_table_start =
+ ext4_bg_get_inode_table_first_block(bg_ref.block_group, &fs->sb);
+
+ /* Put back block group reference (not needed more) */
+ rc = ext4_fs_put_block_group_ref(&bg_ref);
+ if (rc != 0) {
+ return rc;
+ }
+
+ /* Compute position of i-node in the block group */
+ u16int inode_size = ext4_get16(&fs->sb, inode_size);
+ u32int block_size = ext4_sb_get_block_size(&fs->sb);
+ u32int byte_offset_in_group = offset_in_group * inode_size;
+
+ /* Compute block address */
+ ext4_fsblk_t block_id =
+ inode_table_start + (byte_offset_in_group / block_size);
+
+ rc = ext4_trans_block_get(fs->bdev, &ref->block, block_id);
+ if (rc != 0) {
+ return rc;
+ }
+
+ /* Compute position of i-node in the data block */
+ u32int offset_in_block = byte_offset_in_group % block_size;
+ ref->inode = (struct ext4_inode *)(ref->block.data + offset_in_block);
+
+ /* We need to store the original value of index in the reference */
+ ref->index = index + 1;
+ ref->fs = fs;
+ ref->dirty = false;
+
+ if (initialized && !ext4_fs_verify_inode_csum(ref)) {
+ ext4_dbg(DEBUG_FS,
+ DBG_WARN "Inode checksum failed."
+ "Inode: %ud\n",
+ ref->index);
+ }
+
+ return 0;
+}
+
+int ext4_fs_get_inode_ref(struct ext4_fs *fs, u32int index,
+ struct ext4_inode_ref *ref)
+{
+ return __ext4_fs_get_inode_ref(fs, index, ref, true);
+}
+
+int ext4_fs_put_inode_ref(struct ext4_inode_ref *ref)
+{
+ /* Check if reference modified */
+ if (ref->dirty) {
+ /* Mark block dirty for writing changes to physical device */
+ ext4_fs_set_inode_checksum(ref);
+ ext4_trans_set_block_dirty(ref->block.buf);
+ }
+
+ /* Put back block, that contains i-node */
+ return ext4_block_set(ref->fs->bdev, &ref->block);
+}
+
+void ext4_fs_inode_blocks_init(struct ext4_fs *fs,
+ struct ext4_inode_ref *inode_ref)
+{
+ struct ext4_inode *inode = inode_ref->inode;
+
+ /* Reset blocks array. For inode which is not directory or file, just
+ * fill in blocks with 0 */
+ switch (ext4_inode_type(&fs->sb, inode)) {
+ case EXT4_INODE_MODE_FILE:
+ case EXT4_INODE_MODE_DIRECTORY:
+ break;
+ default:
+ return;
+ }
+
+ /* Initialize extents if needed */
+ if (ext4_sb_feature_incom(&fs->sb, EXT4_FINCOM_EXTENTS)) {
+ ext4_inode_set_flag(inode, EXT4_INODE_FLAG_EXTENTS);
+
+ /* Initialize extent root header */
+ ext4_extent_tree_init(inode_ref);
+ }
+
+ inode_ref->dirty = true;
+}
+
+u32int ext4_fs_correspond_inode_mode(int filetype)
+{
+ switch (filetype) {
+ case EXT4_DE_DIR:
+ return EXT4_INODE_MODE_DIRECTORY;
+ case EXT4_DE_REG_FILE:
+ return EXT4_INODE_MODE_FILE;
+ case EXT4_DE_SYMLINK:
+ return EXT4_INODE_MODE_SOFTLINK;
+ case EXT4_DE_CHRDEV:
+ return EXT4_INODE_MODE_CHARDEV;
+ case EXT4_DE_BLKDEV:
+ return EXT4_INODE_MODE_BLOCKDEV;
+ case EXT4_DE_FIFO:
+ return EXT4_INODE_MODE_FIFO;
+ case EXT4_DE_SOCK:
+ return EXT4_INODE_MODE_SOCKET;
+ }
+ /* FIXME: unsupported filetype */
+ return EXT4_INODE_MODE_FILE;
+}
+
+int ext4_fs_alloc_inode(struct ext4_fs *fs, struct ext4_inode_ref *inode_ref,
+ int filetype)
+{
+ /* Check if newly allocated i-node will be a directory */
+ bool is_dir;
+ u16int inode_size = ext4_get16(&fs->sb, inode_size);
+
+ is_dir = (filetype == EXT4_DE_DIR);
+
+ /* Allocate inode by allocation algorithm */
+ u32int index;
+ int rc = ext4_ialloc_alloc_inode(fs, &index, is_dir);
+ if (rc != 0)
+ return rc;
+
+ /* Load i-node from on-disk i-node table */
+ rc = __ext4_fs_get_inode_ref(fs, index, inode_ref, false);
+ if (rc != 0) {
+ ext4_ialloc_free_inode(fs, index, is_dir);
+ return rc;
+ }
+
+ /* Initialize i-node */
+ struct ext4_inode *inode = inode_ref->inode;
+
+ memset(inode, 0, inode_size);
+
+ u32int mode;
+ if (is_dir) {
+ /*
+ * Default directory permissions to be compatible with other
+ * systems
+ * 0777 (octal) == rwxrwxrwx
+ */
+
+ mode = 0777;
+ mode |= EXT4_INODE_MODE_DIRECTORY;
+ } else if (filetype == EXT4_DE_SYMLINK) {
+ /*
+ * Default symbolic link permissions to be compatible with other systems
+ * 0777 (octal) == rwxrwxrwx
+ */
+
+ mode = 0777;
+ mode |= EXT4_INODE_MODE_SOFTLINK;
+ } else {
+ /*
+ * Default file permissions to be compatible with other systems
+ * 0666 (octal) == rw-rw-rw-
+ */
+
+ mode = 0666;
+ mode |= ext4_fs_correspond_inode_mode(filetype);
+ }
+ ext4_inode_set_mode(&fs->sb, inode, mode);
+
+ ext4_inode_set_links_cnt(inode, 0);
+ ext4_inode_set_uid(inode, 0);
+ ext4_inode_set_gid(inode, 0);
+ ext4_inode_set_size(inode, 0);
+ ext4_inode_set_access_time(inode, 0);
+ ext4_inode_set_change_inode_time(inode, 0);
+ ext4_inode_set_modif_time(inode, 0);
+ ext4_inode_set_del_time(inode, 0);
+ ext4_inode_set_blocks_count(&fs->sb, inode, 0);
+ ext4_inode_set_flags(inode, 0);
+ ext4_inode_set_generation(inode, 0);
+ if (inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
+ u16int size = ext4_get16(&fs->sb, want_extra_isize);
+ ext4_inode_set_extra_isize(&fs->sb, inode, size);
+ }
+
+ memset(inode->blocks, 0, sizeof(inode->blocks));
+ inode_ref->dirty = true;
+
+ return 0;
+}
+
+int ext4_fs_free_inode(struct ext4_inode_ref *inode_ref)
+{
+ struct ext4_fs *fs = inode_ref->fs;
+ u32int offset;
+ u32int suboff;
+ int rc;
+
+ /* For extents must be data block destroyed by other way */
+ if ((ext4_sb_feature_incom(&fs->sb, EXT4_FINCOM_EXTENTS)) &&
+ (ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_EXTENTS))) {
+ /* Data structures are released during truncate operation... */
+ goto finish;
+ }
+
+ /* Release all indirect (no data) blocks */
+
+ /* 1) Single indirect */
+ ext4_fsblk_t fblock = ext4_inode_get_indirect_block(inode_ref->inode, 0);
+ if (fblock != 0) {
+ int rc = ext4_balloc_free_block(inode_ref, fblock);
+ if (rc != 0)
+ return rc;
+
+ ext4_inode_set_indirect_block(inode_ref->inode, 0, 0);
+ }
+
+ u32int block_size = ext4_sb_get_block_size(&fs->sb);
+ u32int count = block_size / sizeof(u32int);
+
+ struct ext4_block block;
+
+ /* 2) Double indirect */
+ fblock = ext4_inode_get_indirect_block(inode_ref->inode, 1);
+ if (fblock != 0) {
+ int rc = ext4_trans_block_get(fs->bdev, &block, fblock);
+ if (rc != 0)
+ return rc;
+
+ ext4_fsblk_t ind_block;
+ for (offset = 0; offset < count; ++offset) {
+ ind_block = to_le32(((u32int *)block.data)[offset]);
+
+ if (ind_block == 0)
+ continue;
+ rc = ext4_balloc_free_block(inode_ref, ind_block);
+ if (rc != 0) {
+ ext4_block_set(fs->bdev, &block);
+ return rc;
+ }
+
+ }
+
+ ext4_block_set(fs->bdev, &block);
+ rc = ext4_balloc_free_block(inode_ref, fblock);
+ if (rc != 0)
+ return rc;
+
+ ext4_inode_set_indirect_block(inode_ref->inode, 1, 0);
+ }
+
+ /* 3) Tripple indirect */
+ struct ext4_block subblock;
+ fblock = ext4_inode_get_indirect_block(inode_ref->inode, 2);
+ if (fblock == 0)
+ goto finish;
+ rc = ext4_trans_block_get(fs->bdev, &block, fblock);
+ if (rc != 0)
+ return rc;
+
+ ext4_fsblk_t ind_block;
+ for (offset = 0; offset < count; ++offset) {
+ ind_block = to_le32(((u32int *)block.data)[offset]);
+
+ if (ind_block == 0)
+ continue;
+ rc = ext4_trans_block_get(fs->bdev, &subblock,
+ ind_block);
+ if (rc != 0) {
+ ext4_block_set(fs->bdev, &block);
+ return rc;
+ }
+
+ ext4_fsblk_t ind_subblk;
+ for (suboff = 0; suboff < count; ++suboff) {
+ ind_subblk = to_le32(((u32int *)subblock.data)[suboff]);
+
+ if (ind_subblk == 0)
+ continue;
+ rc = ext4_balloc_free_block(inode_ref, ind_subblk);
+ if (rc != 0) {
+ ext4_block_set(fs->bdev, &subblock);
+ ext4_block_set(fs->bdev, &block);
+ return rc;
+ }
+
+ }
+
+ ext4_block_set(fs->bdev, &subblock);
+
+ rc = ext4_balloc_free_block(inode_ref,
+ ind_block);
+ if (rc != 0) {
+ ext4_block_set(fs->bdev, &block);
+ return rc;
+ }
+
+ }
+
+ ext4_block_set(fs->bdev, &block);
+ rc = ext4_balloc_free_block(inode_ref, fblock);
+ if (rc != 0)
+ return rc;
+
+ ext4_inode_set_indirect_block(inode_ref->inode, 2, 0);
+finish:
+ /* Mark inode dirty for writing to the physical device */
+ inode_ref->dirty = true;
+
+ /* Free block with extended attributes if present */
+ ext4_fsblk_t xattr_block =
+ ext4_inode_get_file_acl(inode_ref->inode, &fs->sb);
+ if (xattr_block) {
+ int rc = ext4_balloc_free_block(inode_ref, xattr_block);
+ if (rc != 0)
+ return rc;
+
+ ext4_inode_set_file_acl(inode_ref->inode, &fs->sb, 0);
+ }
+
+ /* Free inode by allocator */
+ if (ext4_inode_is_type(&fs->sb, inode_ref->inode,
+ EXT4_INODE_MODE_DIRECTORY))
+ rc = ext4_ialloc_free_inode(fs, inode_ref->index, true);
+ else
+ rc = ext4_ialloc_free_inode(fs, inode_ref->index, false);
+
+ return rc;
+}
+
+
+/**@brief Release data block from i-node
+ * @param inode_ref I-node to release block from
+ * @param iblock Logical block to be released
+ * @return Error code
+ */
+static int ext4_fs_release_inode_block(struct ext4_inode_ref *inode_ref,
+ ext4_lblk_t iblock)
+{
+ ext4_fsblk_t fblock;
+
+ struct ext4_fs *fs = inode_ref->fs;
+
+ /* Extents are handled otherwise = there is not support in this function
+ */
+ assert(!(
+ ext4_sb_feature_incom(&fs->sb, EXT4_FINCOM_EXTENTS) &&
+ (ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_EXTENTS))));
+
+ struct ext4_inode *inode = inode_ref->inode;
+
+ /* Handle simple case when we are dealing with direct reference */
+ if (iblock < EXT4_INODE_DIRECT_BLOCK_COUNT) {
+ fblock = ext4_inode_get_direct_block(inode, iblock);
+
+ /* Sparse file */
+ if (fblock == 0)
+ return 0;
+
+ ext4_inode_set_direct_block(inode, iblock, 0);
+ return ext4_balloc_free_block(inode_ref, fblock);
+ }
+
+ /* Determine the indirection level needed to get the desired block */
+ unsigned int level = 0;
+ unsigned int i;
+ for (i = 1; i < 4; i++) {
+ if (iblock < fs->inode_block_limits[i]) {
+ level = i;
+ break;
+ }
+ }
+
+ if (level == 0) {
+ werrstr(Eio);
+ return -1;
+ }
+
+ /* Compute offsets for the topmost level */
+ u32int block_offset_in_level =
+ (u32int)(iblock - fs->inode_block_limits[level - 1]);
+ ext4_fsblk_t current_block =
+ ext4_inode_get_indirect_block(inode, level - 1);
+ u32int offset_in_block =
+ (u32int)(block_offset_in_level / fs->inode_blocks_per_level[level - 1]);
+
+ /*
+ * Navigate through other levels, until we find the block number
+ * or find null reference meaning we are dealing with sparse file
+ */
+ struct ext4_block block;
+
+ while (level > 0) {
+
+ /* Sparse check */
+ if (current_block == 0)
+ return 0;
+
+ int rc = ext4_trans_block_get(fs->bdev, &block, current_block);
+ if (rc != 0)
+ return rc;
+
+ current_block =
+ to_le32(((u32int *)block.data)[offset_in_block]);
+
+ /* Set zero if physical data block address found */
+ if (level == 1) {
+ ((u32int *)block.data)[offset_in_block] = to_le32(0);
+ ext4_trans_set_block_dirty(block.buf);
+ }
+
+ rc = ext4_block_set(fs->bdev, &block);
+ if (rc != 0)
+ return rc;
+
+ level--;
+
+ /*
+ * If we are on the last level, break here as
+ * there is no next level to visit
+ */
+ if (level == 0)
+ break;
+
+ /* Visit the next level */
+ block_offset_in_level %= fs->inode_blocks_per_level[level];
+ offset_in_block = (u32int)(block_offset_in_level /
+ fs->inode_blocks_per_level[level - 1]);
+ }
+
+ fblock = current_block;
+ if (fblock == 0)
+ return 0;
+
+ /* Physical block is not referenced, it can be released */
+ return ext4_balloc_free_block(inode_ref, fblock);
+}
+
+int ext4_fs_truncate_inode(struct ext4_inode_ref *inode_ref, u64int new_size)
+{
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+ u32int i;
+ int r;
+ bool v;
+
+ /* Check flags, if i-node can be truncated */
+ if (!ext4_inode_can_truncate(sb, inode_ref->inode)) {
+ werrstr(Einval);
+ return -1;
+ }
+
+ /* If sizes are equal, nothing has to be done. */
+ u64int old_size = ext4_inode_get_size(sb, inode_ref->inode);
+ if (old_size == new_size)
+ return 0;
+
+ /* It's not supported to make the larger file by truncate operation */
+ if (old_size < new_size) {
+ werrstr(Einval);
+ return -1;
+ }
+
+ /* For symbolic link which is small enough */
+ v = ext4_inode_is_type(sb, inode_ref->inode, EXT4_INODE_MODE_SOFTLINK);
+ if (v && old_size < sizeof(inode_ref->inode->blocks) &&
+ !ext4_inode_get_blocks_count(sb, inode_ref->inode)) {
+ char *content = (char *)inode_ref->inode->blocks + new_size;
+ memset(content, 0,
+ sizeof(inode_ref->inode->blocks) - (u32int)new_size);
+ ext4_inode_set_size(inode_ref->inode, new_size);
+ inode_ref->dirty = true;
+
+ return 0;
+ }
+
+ i = ext4_inode_type(sb, inode_ref->inode);
+ if (i == EXT4_INODE_MODE_CHARDEV ||
+ i == EXT4_INODE_MODE_BLOCKDEV ||
+ i == EXT4_INODE_MODE_SOCKET) {
+ inode_ref->inode->blocks[0] = 0;
+ inode_ref->inode->blocks[1] = 0;
+
+ inode_ref->dirty = true;
+ return 0;
+ }
+
+ /* Compute how many blocks will be released */
+ u32int block_size = ext4_sb_get_block_size(sb);
+ u32int new_blocks_cnt = (u32int)((new_size + block_size - 1) / block_size);
+ u32int old_blocks_cnt = (u32int)((old_size + block_size - 1) / block_size);
+ u32int diff_blocks_cnt = old_blocks_cnt - new_blocks_cnt;
+
+ if ((ext4_sb_feature_incom(sb, EXT4_FINCOM_EXTENTS)) &&
+ (ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_EXTENTS))) {
+
+ /* Extents require special operation */
+ if (diff_blocks_cnt) {
+ r = ext4_extent_remove_space(inode_ref, new_blocks_cnt,
+ EXT4_EXTENT_MAX_BLOCKS);
+ if (r != 0)
+ return r;
+
+ }
+ } else {
+ /* Release data blocks from the end of file */
+
+ /* Starting from 1 because of logical blocks are numbered from 0
+ */
+ for (i = 0; i < diff_blocks_cnt; ++i) {
+ r = ext4_fs_release_inode_block(inode_ref,
+ new_blocks_cnt + i);
+ if (r != 0)
+ return r;
+ }
+ }
+
+ /* Update i-node */
+ ext4_inode_set_size(inode_ref->inode, new_size);
+ inode_ref->dirty = true;
+
+ return 0;
+}
+
+/**@brief Compute 'goal' for inode index
+ * @param inode_ref Reference to inode, to allocate block for
+ * @return goal
+ */
+ext4_fsblk_t ext4_fs_inode_to_goal_block(struct ext4_inode_ref *inode_ref)
+{
+ u32int grp_inodes = ext4_get32(&inode_ref->fs->sb, inodes_per_group);
+ return (inode_ref->index - 1) / grp_inodes;
+}
+
+/**@brief Compute 'goal' for allocation algorithm (For blockmap).
+ * @param inode_ref Reference to inode, to allocate block for
+ * @param goal
+ * @return error code
+ */
+int ext4_fs_indirect_find_goal(struct ext4_inode_ref *inode_ref,
+ ext4_fsblk_t *goal)
+{
+ int r;
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+ *goal = 0;
+
+ u64int inode_size = ext4_inode_get_size(sb, inode_ref->inode);
+ u32int block_size = ext4_sb_get_block_size(sb);
+ u32int iblock_cnt = (u32int)(inode_size / block_size);
+
+ if (inode_size % block_size != 0)
+ iblock_cnt++;
+
+ /* If inode has some blocks, get last block address + 1 */
+ if (iblock_cnt > 0) {
+ r = ext4_fs_get_inode_dblk_idx(inode_ref, iblock_cnt - 1,
+ goal, false);
+ if (r != 0)
+ return r;
+
+ if (*goal != 0) {
+ (*goal)++;
+ return r;
+ }
+
+ /* If goal == 0, sparse file -> continue */
+ }
+
+ /* Identify block group of inode */
+
+ u32int inodes_per_bg = ext4_get32(sb, inodes_per_group);
+ u32int block_group = (inode_ref->index - 1) / inodes_per_bg;
+ block_size = ext4_sb_get_block_size(sb);
+
+ /* Load block group reference */
+ struct ext4_block_group_ref bg_ref;
+ r = ext4_fs_get_block_group_ref(inode_ref->fs, block_group, &bg_ref);
+ if (r != 0)
+ return r;
+
+ struct ext4_bgroup *bg = bg_ref.block_group;
+
+ /* Compute indexes */
+ u32int bg_count = ext4_block_group_cnt(sb);
+ ext4_fsblk_t itab_first_block = ext4_bg_get_inode_table_first_block(bg, sb);
+ u16int itab_item_size = ext4_get16(sb, inode_size);
+ u32int itab_bytes;
+
+ /* Check for last block group */
+ if (block_group < bg_count - 1) {
+ itab_bytes = inodes_per_bg * itab_item_size;
+ } else {
+ /* Last block group could be smaller */
+ u32int inodes_cnt = ext4_get32(sb, inodes_count);
+
+ itab_bytes = (inodes_cnt - ((bg_count - 1) * inodes_per_bg));
+ itab_bytes *= itab_item_size;
+ }
+
+ ext4_fsblk_t inode_table_blocks = itab_bytes / block_size;
+
+ if (itab_bytes % block_size)
+ inode_table_blocks++;
+
+ *goal = itab_first_block + inode_table_blocks;
+
+ return ext4_fs_put_block_group_ref(&bg_ref);
+}
+
+static int ext4_fs_get_inode_dblk_idx_internal(struct ext4_inode_ref *inode_ref,
+ ext4_lblk_t iblock, ext4_fsblk_t *fblock,
+ bool extent_create,
+ bool support_unwritten)
+{
+ struct ext4_fs *fs = inode_ref->fs;
+
+ /* For empty file is situation simple */
+ if (ext4_inode_get_size(&fs->sb, inode_ref->inode) == 0) {
+ *fblock = 0;
+ return 0;
+ }
+
+ ext4_fsblk_t current_block;
+
+ USED(extent_create);
+ USED(support_unwritten);
+
+ /* Handle i-node using extents */
+ if ((ext4_sb_feature_incom(&fs->sb, EXT4_FINCOM_EXTENTS)) &&
+ (ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_EXTENTS))) {
+
+ ext4_fsblk_t current_fsblk;
+ int rc = ext4_extent_get_blocks(inode_ref, iblock, 1,
+ ¤t_fsblk, extent_create, nil);
+ if (rc != 0)
+ return rc;
+
+ current_block = current_fsblk;
+ *fblock = current_block;
+
+ return 0;
+ }
+
+ struct ext4_inode *inode = inode_ref->inode;
+
+ /* Direct block are read directly from array in i-node structure */
+ if (iblock < EXT4_INODE_DIRECT_BLOCK_COUNT) {
+ current_block =
+ ext4_inode_get_direct_block(inode, (u32int)iblock);
+ *fblock = current_block;
+ return 0;
+ }
+
+ /* Determine indirection level of the target block */
+ unsigned int l = 0;
+ unsigned int i;
+ for (i = 1; i < 4; i++) {
+ if (iblock < fs->inode_block_limits[i]) {
+ l = i;
+ break;
+ }
+ }
+
+ if (l == 0) {
+ werrstr(Eio);
+ return -1;
+ }
+
+ /* Compute offsets for the topmost level */
+ u32int blk_off_in_lvl = (u32int)(iblock - fs->inode_block_limits[l - 1]);
+ current_block = ext4_inode_get_indirect_block(inode, l - 1);
+ u32int off_in_blk = (u32int)(blk_off_in_lvl / fs->inode_blocks_per_level[l - 1]);
+
+ /* Sparse file */
+ if (current_block == 0) {
+ *fblock = 0;
+ return 0;
+ }
+
+ struct ext4_block block;
+
+ /*
+ * Navigate through other levels, until we find the block number
+ * or find null reference meaning we are dealing with sparse file
+ */
+ while (l > 0) {
+ /* Load indirect block */
+ int rc = ext4_trans_block_get(fs->bdev, &block, current_block);
+ if (rc != 0)
+ return rc;
+
+ /* Read block address from indirect block */
+ current_block =
+ to_le32(((u32int *)block.data)[off_in_blk]);
+
+ /* Put back indirect block untouched */
+ rc = ext4_block_set(fs->bdev, &block);
+ if (rc != 0)
+ return rc;
+
+ /* Check for sparse file */
+ if (current_block == 0) {
+ *fblock = 0;
+ return 0;
+ }
+
+ /* Jump to the next level */
+ l--;
+
+ /* Termination condition - we have address of data block loaded
+ */
+ if (l == 0)
+ break;
+
+ /* Visit the next level */
+ blk_off_in_lvl %= fs->inode_blocks_per_level[l];
+ off_in_blk = (u32int)(blk_off_in_lvl / fs->inode_blocks_per_level[l - 1]);
+ }
+
+ *fblock = current_block;
+
+ return 0;
+}
+
+
+int ext4_fs_get_inode_dblk_idx(struct ext4_inode_ref *inode_ref,
+ ext4_lblk_t iblock, ext4_fsblk_t *fblock,
+ bool support_unwritten)
+{
+ return ext4_fs_get_inode_dblk_idx_internal(inode_ref, iblock, fblock,
+ false, support_unwritten);
+}
+
+int ext4_fs_init_inode_dblk_idx(struct ext4_inode_ref *inode_ref,
+ ext4_lblk_t iblock, ext4_fsblk_t *fblock)
+{
+ return ext4_fs_get_inode_dblk_idx_internal(inode_ref, iblock, fblock,
+ true, true);
+}
+
+static int ext4_fs_set_inode_data_block_index(struct ext4_inode_ref *inode_ref,
+ ext4_lblk_t iblock, ext4_fsblk_t fblock)
+{
+ struct ext4_fs *fs = inode_ref->fs;
+
+ /* Handle inode using extents */
+ if ((ext4_sb_feature_incom(&fs->sb, EXT4_FINCOM_EXTENTS)) &&
+ (ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_EXTENTS))) {
+ /* Not reachable */
+ werrstr("impossible feature combination in extents");
+ return -1;
+ }
+
+ /* Handle simple case when we are dealing with direct reference */
+ if (iblock < EXT4_INODE_DIRECT_BLOCK_COUNT) {
+ ext4_inode_set_direct_block(inode_ref->inode, (u32int)iblock,
+ (u32int)fblock);
+ inode_ref->dirty = true;
+
+ return 0;
+ }
+
+ /* Determine the indirection level needed to get the desired block */
+ unsigned int l = 0;
+ unsigned int i;
+ for (i = 1; i < 4; i++) {
+ if (iblock < fs->inode_block_limits[i]) {
+ l = i;
+ break;
+ }
+ }
+
+ if (l == 0) {
+ werrstr(Eio);
+ return -1;
+ }
+
+ u32int block_size = ext4_sb_get_block_size(&fs->sb);
+
+ /* Compute offsets for the topmost level */
+ u32int blk_off_in_lvl = (u32int)(iblock - fs->inode_block_limits[l - 1]);
+ ext4_fsblk_t current_block =
+ ext4_inode_get_indirect_block(inode_ref->inode, l - 1);
+ u32int off_in_blk = (u32int)(blk_off_in_lvl / fs->inode_blocks_per_level[l - 1]);
+
+ ext4_fsblk_t new_blk;
+
+ struct ext4_block block;
+ struct ext4_block new_block;
+
+ /* Is needed to allocate indirect block on the i-node level */
+ if (current_block == 0) {
+ /* Allocate new indirect block */
+ ext4_fsblk_t goal;
+ int rc = ext4_fs_indirect_find_goal(inode_ref, &goal);
+ if (rc != 0)
+ return rc;
+
+ rc = ext4_balloc_alloc_block(inode_ref, goal, &new_blk);
+ if (rc != 0)
+ return rc;
+
+ /* Update i-node */
+ ext4_inode_set_indirect_block(inode_ref->inode, l - 1,
+ (u32int)new_blk);
+ inode_ref->dirty = true;
+
+ /* Load newly allocated block */
+ rc = ext4_trans_block_get_noread(fs->bdev, &new_block, new_blk);
+ if (rc != 0) {
+ ext4_balloc_free_block(inode_ref, new_blk);
+ return rc;
+ }
+
+ /* Initialize new block */
+ memset(new_block.data, 0, block_size);
+ ext4_trans_set_block_dirty(new_block.buf);
+
+ /* Put back the allocated block */
+ rc = ext4_block_set(fs->bdev, &new_block);
+ if (rc != 0)
+ return rc;
+
+ current_block = new_blk;
+ }
+
+ /*
+ * Navigate through other levels, until we find the block number
+ * or find null reference meaning we are dealing with sparse file
+ */
+ while (l > 0) {
+ int rc = ext4_trans_block_get(fs->bdev, &block, current_block);
+ if (rc != 0)
+ return rc;
+
+ current_block = to_le32(((u32int *)block.data)[off_in_blk]);
+ if ((l > 1) && (current_block == 0)) {
+ ext4_fsblk_t goal;
+ rc = ext4_fs_indirect_find_goal(inode_ref, &goal);
+ if (rc != 0) {
+ ext4_block_set(fs->bdev, &block);
+ return rc;
+ }
+
+ /* Allocate new block */
+ rc =
+ ext4_balloc_alloc_block(inode_ref, goal, &new_blk);
+ if (rc != 0) {
+ ext4_block_set(fs->bdev, &block);
+ return rc;
+ }
+
+ /* Load newly allocated block */
+ rc = ext4_trans_block_get_noread(fs->bdev, &new_block,
+ new_blk);
+
+ if (rc != 0) {
+ ext4_block_set(fs->bdev, &block);
+ return rc;
+ }
+
+ /* Initialize allocated block */
+ memset(new_block.data, 0, block_size);
+ ext4_trans_set_block_dirty(new_block.buf);
+
+ rc = ext4_block_set(fs->bdev, &new_block);
+ if (rc != 0) {
+ ext4_block_set(fs->bdev, &block);
+ return rc;
+ }
+
+ /* Write block address to the parent */
+ u32int * p = (u32int * )block.data;
+ p[off_in_blk] = to_le32((u32int)new_blk);
+ ext4_trans_set_block_dirty(block.buf);
+ current_block = new_blk;
+ }
+
+ /* Will be finished, write the fblock address */
+ if (l == 1) {
+ u32int * p = (u32int * )block.data;
+ p[off_in_blk] = to_le32((u32int)fblock);
+ ext4_trans_set_block_dirty(block.buf);
+ }
+
+ rc = ext4_block_set(fs->bdev, &block);
+ if (rc != 0)
+ return rc;
+
+ l--;
+
+ /*
+ * If we are on the last level, break here as
+ * there is no next level to visit
+ */
+ if (l == 0)
+ break;
+
+ /* Visit the next level */
+ blk_off_in_lvl %= fs->inode_blocks_per_level[l];
+ off_in_blk = (u32int)(blk_off_in_lvl / fs->inode_blocks_per_level[l - 1]);
+ }
+
+ return 0;
+}
+
+
+int ext4_fs_append_inode_dblk(struct ext4_inode_ref *inode_ref,
+ ext4_fsblk_t *fblock, ext4_lblk_t *iblock)
+{
+ /* Handle extents separately */
+ if ((ext4_sb_feature_incom(&inode_ref->fs->sb, EXT4_FINCOM_EXTENTS)) &&
+ (ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_EXTENTS))) {
+ int rc;
+ ext4_fsblk_t current_fsblk;
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+ u64int inode_size = ext4_inode_get_size(sb, inode_ref->inode);
+ u32int block_size = ext4_sb_get_block_size(sb);
+ *iblock = (u32int)((inode_size + block_size - 1) / block_size);
+
+ rc = ext4_extent_get_blocks(inode_ref, *iblock, 1,
+ ¤t_fsblk, true, nil);
+ if (rc != 0)
+ return rc;
+
+ *fblock = current_fsblk;
+ assert(*fblock);
+
+ ext4_inode_set_size(inode_ref->inode, inode_size + block_size);
+ inode_ref->dirty = true;
+
+
+ return rc;
+ }
+
+ struct ext4_sblock *sb = &inode_ref->fs->sb;
+
+ /* Compute next block index and allocate data block */
+ u64int inode_size = ext4_inode_get_size(sb, inode_ref->inode);
+ u32int block_size = ext4_sb_get_block_size(sb);
+
+ /* Align size i-node size */
+ if ((inode_size % block_size) != 0)
+ inode_size += block_size - (inode_size % block_size);
+
+ /* Logical blocks are numbered from 0 */
+ u32int new_block_idx = (u32int)(inode_size / block_size);
+
+ /* Allocate new physical block */
+ ext4_fsblk_t goal, phys_block;
+ int rc = ext4_fs_indirect_find_goal(inode_ref, &goal);
+ if (rc != 0)
+ return rc;
+
+ rc = ext4_balloc_alloc_block(inode_ref, goal, &phys_block);
+ if (rc != 0)
+ return rc;
+
+ /* Add physical block address to the i-node */
+ rc = ext4_fs_set_inode_data_block_index(inode_ref, new_block_idx,
+ phys_block);
+ if (rc != 0) {
+ ext4_balloc_free_block(inode_ref, phys_block);
+ return rc;
+ }
+
+ /* Update i-node */
+ ext4_inode_set_size(inode_ref->inode, inode_size + block_size);
+ inode_ref->dirty = true;
+
+ *fblock = phys_block;
+ *iblock = new_block_idx;
+
+ return 0;
+}
+
+void ext4_fs_inode_links_count_inc(struct ext4_inode_ref *inode_ref)
+{
+ u16int link;
+ bool is_dx;
+ link = ext4_inode_get_links_cnt(inode_ref->inode);
+ link++;
+ ext4_inode_set_links_cnt(inode_ref->inode, link);
+
+ is_dx = ext4_sb_feature_com(&inode_ref->fs->sb, EXT4_FCOM_DIR_INDEX) &&
+ ext4_inode_has_flag(inode_ref->inode, EXT4_INODE_FLAG_INDEX);
+
+ if (is_dx && link > 1) {
+ if (link >= EXT4_LINK_MAX || link == 2) {
+ ext4_inode_set_links_cnt(inode_ref->inode, 1);
+
+ u32int v;
+ v = ext4_get32(&inode_ref->fs->sb, features_read_only);
+ v |= EXT4_FRO_COM_DIR_NLINK;
+ ext4_set32(&inode_ref->fs->sb, features_read_only, v);
+ }
+ }
+}
+
+void ext4_fs_inode_links_count_dec(struct ext4_inode_ref *inode_ref)
+{
+ u16int links = ext4_inode_get_links_cnt(inode_ref->inode);
+ if (!ext4_inode_is_type(&inode_ref->fs->sb, inode_ref->inode,
+ EXT4_INODE_MODE_DIRECTORY)) {
+ if (links > 0)
+ ext4_inode_set_links_cnt(inode_ref->inode, links - 1);
+ return;
+ }
+
+ if (links > 2)
+ ext4_inode_set_links_cnt(inode_ref->inode, links - 1);
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_hash.c
@@ -1,0 +1,313 @@
+/*
+ * Copyright (c) 2013 Grzegorz Kostka ([email protected])
+ *
+ * FreeBSD:
+ * Copyright (c) 2010, 2013 Zheng Liu <[email protected]>
+ * Copyright (c) 2012, Vyacheslav Matyushin
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ * The following notice applies to the code in ext2_half_md4():
+ *
+ * Copyright (C) 1990-2, RSA Data Security, Inc. All rights reserved.
+ *
+ * License to copy and use this software is granted provided that it
+ * is identified as the "RSA Data Security, Inc. MD4 Message-Digest
+ * Algorithm" in all material mentioning or referencing this software
+ * or this function.
+ *
+ * License is also granted to make and use derivative works provided
+ * that such works are identified as "derived from the RSA Data
+ * Security, Inc. MD4 Message-Digest Algorithm" in all material
+ * mentioning or referencing the derived work.
+ *
+ * RSA Data Security, Inc. makes no representations concerning either
+ * the merchantability of this software or the suitability of this
+ * software for any particular purpose. It is provided "as is"
+ * without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this
+ * documentation and/or software.
+ */
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+
+/* F, G, and H are MD4 functions */
+#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
+#define G(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+
+/* ROTATE_LEFT rotates x left n bits */
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+
+/*
+ * FF, GG, and HH are transformations for rounds 1, 2, and 3.
+ * Rotation is separated from addition to prevent recomputation.
+ */
+#define FF(a, b, c, d, x, s) \
+ { \
+ (a) += F((b), (c), (d)) + (x); \
+ (a) = ROTATE_LEFT((a), (s)); \
+ \
+}
+
+#define GG(a, b, c, d, x, s) \
+ { \
+ (a) += G((b), (c), (d)) + (x) + (u32int)0x5A827999; \
+ (a) = ROTATE_LEFT((a), (s)); \
+ \
+}
+
+#define HH(a, b, c, d, x, s) \
+ { \
+ (a) += H((b), (c), (d)) + (x) + (u32int)0x6ED9EBA1; \
+ (a) = ROTATE_LEFT((a), (s)); \
+ \
+}
+
+/*
+ * MD4 basic transformation. It transforms state based on block.
+ *
+ * This is a half md4 algorithm since Linux uses this algorithm for dir
+ * index. This function is derived from the RSA Data Security, Inc. MD4
+ * Message-Digest Algorithm and was modified as necessary.
+ *
+ * The return value of this function is u32int in Linux, but actually we don't
+ * need to check this value, so in our version this function doesn't return any
+ * value.
+ */
+static void ext2_half_md4(u32int hash[4], u32int data[8])
+{
+ u32int a = hash[0], b = hash[1], c = hash[2], d = hash[3];
+
+ /* Round 1 */
+ FF(a, b, c, d, data[0], 3);
+ FF(d, a, b, c, data[1], 7);
+ FF(c, d, a, b, data[2], 11);
+ FF(b, c, d, a, data[3], 19);
+ FF(a, b, c, d, data[4], 3);
+ FF(d, a, b, c, data[5], 7);
+ FF(c, d, a, b, data[6], 11);
+ FF(b, c, d, a, data[7], 19);
+
+ /* Round 2 */
+ GG(a, b, c, d, data[1], 3);
+ GG(d, a, b, c, data[3], 5);
+ GG(c, d, a, b, data[5], 9);
+ GG(b, c, d, a, data[7], 13);
+ GG(a, b, c, d, data[0], 3);
+ GG(d, a, b, c, data[2], 5);
+ GG(c, d, a, b, data[4], 9);
+ GG(b, c, d, a, data[6], 13);
+
+ /* Round 3 */
+ HH(a, b, c, d, data[3], 3);
+ HH(d, a, b, c, data[7], 9);
+ HH(c, d, a, b, data[2], 11);
+ HH(b, c, d, a, data[6], 15);
+ HH(a, b, c, d, data[1], 3);
+ HH(d, a, b, c, data[5], 9);
+ HH(c, d, a, b, data[0], 11);
+ HH(b, c, d, a, data[4], 15);
+
+ hash[0] += a;
+ hash[1] += b;
+ hash[2] += c;
+ hash[3] += d;
+}
+
+/*
+ * Tiny Encryption Algorithm.
+ */
+static void ext2_tea(u32int hash[4], u32int data[8])
+{
+ u32int tea_delta = 0x9E3779B9;
+ u32int sum;
+ u32int x = hash[0], y = hash[1];
+ int n = 16;
+ int i = 1;
+
+ while (n-- > 0) {
+ sum = i * tea_delta;
+ x += ((y << 4) + data[0]) ^ (y + sum) ^ ((y >> 5) + data[1]);
+ y += ((x << 4) + data[2]) ^ (x + sum) ^ ((x >> 5) + data[3]);
+ i++;
+ }
+
+ hash[0] += x;
+ hash[1] += y;
+}
+
+static u32int ext2_legacy_hash(const char *name, int len, int unsigned_char)
+{
+ u32int h0, h1 = 0x12A3FE2D, h2 = 0x37ABE8F9;
+ u32int multi = 0x6D22F5;
+ const unsigned char *uname = (const unsigned char *)name;
+ const signed char *sname = (const signed char *)name;
+ int val, i;
+
+ for (i = 0; i < len; i++) {
+ if (unsigned_char)
+ val = (unsigned int)*uname++;
+ else
+ val = (int)*sname++;
+
+ h0 = h2 + (h1 ^ (val * multi));
+ if (h0 & 0x80000000)
+ h0 -= 0x7FFFFFFF;
+ h2 = h1;
+ h1 = h0;
+ }
+
+ return (h1 << 1);
+}
+
+static void ext2_prep_hashbuf(const char *src, u32int slen, u32int *dst,
+ int dlen, int unsigned_char)
+{
+ u32int padding = slen | (slen << 8) | (slen << 16) | (slen << 24);
+ u32int buf_val;
+ int len, i;
+ int buf_byte;
+ const unsigned char *ubuf = (const unsigned char *)src;
+ const signed char *sbuf = (const signed char *)src;
+
+ if (slen > (u32int)dlen)
+ len = dlen;
+ else
+ len = slen;
+
+ buf_val = padding;
+
+ for (i = 0; i < len; i++) {
+ if (unsigned_char)
+ buf_byte = (unsigned int)ubuf[i];
+ else
+ buf_byte = (int)sbuf[i];
+
+ if ((i % 4) == 0)
+ buf_val = padding;
+
+ buf_val <<= 8;
+ buf_val += buf_byte;
+
+ if ((i % 4) == 3) {
+ *dst++ = buf_val;
+ dlen -= sizeof(u32int);
+ buf_val = padding;
+ }
+ }
+
+ dlen -= sizeof(u32int);
+ if (dlen >= 0)
+ *dst++ = buf_val;
+
+ dlen -= sizeof(u32int);
+ while (dlen >= 0) {
+ *dst++ = padding;
+ dlen -= sizeof(u32int);
+ }
+}
+
+int ext2_htree_hash(const char *name, int len, const u32int *hash_seed,
+ int hash_version, u32int *hash_major,
+ u32int *hash_minor)
+{
+ u32int hash[4];
+ u32int data[8];
+ u32int major, minor = 0;
+ int unsigned_char = 0;
+
+ if (!name || !hash_major)
+ return (-1);
+
+ if (len < 1 || len > 255)
+ goto error;
+
+ hash[0] = 0x67452301;
+ hash[1] = 0xEFCDAB89;
+ hash[2] = 0x98BADCFE;
+ hash[3] = 0x10325476;
+
+ if (hash_seed)
+ memcpy(hash, hash_seed, sizeof(hash));
+
+ switch (hash_version) {
+ case EXT2_HTREE_TEA_UNSIGNED:
+ unsigned_char = 1;
+ /* FALLTHRU */
+ case EXT2_HTREE_TEA:
+ while (len > 0) {
+ ext2_prep_hashbuf(name, len, data, 16, unsigned_char);
+ ext2_tea(hash, data);
+ len -= 16;
+ name += 16;
+ }
+ major = hash[0];
+ minor = hash[1];
+ break;
+ case EXT2_HTREE_LEGACY_UNSIGNED:
+ unsigned_char = 1;
+ /* FALLTHRU */
+ case EXT2_HTREE_LEGACY:
+ major = ext2_legacy_hash(name, len, unsigned_char);
+ break;
+ case EXT2_HTREE_HALF_MD4_UNSIGNED:
+ unsigned_char = 1;
+ /* FALLTHRU */
+ case EXT2_HTREE_HALF_MD4:
+ while (len > 0) {
+ ext2_prep_hashbuf(name, len, data, 32, unsigned_char);
+ ext2_half_md4(hash, data);
+ len -= 32;
+ name += 32;
+ }
+ major = hash[1];
+ minor = hash[2];
+ break;
+ default:
+ goto error;
+ }
+
+ major &= ~1;
+ if (major == (EXT2_HTREE_EOF << 1))
+ major = (EXT2_HTREE_EOF - 1) << 1;
+ *hash_major = major;
+ if (hash_minor)
+ *hash_minor = minor;
+
+ return 0;
+
+error:
+ *hash_major = 0;
+ if (hash_minor)
+ *hash_minor = 0;
+ werrstr("unsupported hash version: %d", hash_version);
+ return -1;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_ialloc.c
@@ -1,0 +1,313 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_trans.h"
+#include "ext4_ialloc.h"
+#include "ext4_super.h"
+#include "ext4_crc32.h"
+#include "ext4_fs.h"
+#include "ext4_blockdev.h"
+#include "ext4_block_group.h"
+#include "ext4_bitmap.h"
+
+/**@brief Convert i-node number to relative index in block group.
+ * @param sb Superblock
+ * @param inode I-node number to be converted
+ * @return Index of the i-node in the block group
+ */
+static u32int ext4_ialloc_inode_to_bgidx(struct ext4_sblock *sb,
+ u32int inode)
+{
+ u32int inodes_per_group = ext4_get32(sb, inodes_per_group);
+ return (inode - 1) % inodes_per_group;
+}
+
+/**@brief Convert relative index of i-node to absolute i-node number.
+ * @param sb Superblock
+ * @param index Index to be converted
+ * @return Absolute number of the i-node
+ *
+ */
+static u32int ext4_ialloc_bgidx_to_inode(struct ext4_sblock *sb,
+ u32int index, u32int bgid)
+{
+ u32int inodes_per_group = ext4_get32(sb, inodes_per_group);
+ return bgid * inodes_per_group + (index + 1);
+}
+
+/**@brief Compute block group number from the i-node number.
+ * @param sb Superblock
+ * @param inode I-node number to be found the block group for
+ * @return Block group number computed from i-node number
+ */
+static u32int ext4_ialloc_get_bgid_of_inode(struct ext4_sblock *sb,
+ u32int inode)
+{
+ u32int inodes_per_group = ext4_get32(sb, inodes_per_group);
+ return (inode - 1) / inodes_per_group;
+}
+
+static u32int ext4_ialloc_bitmap_csum(struct ext4_sblock *sb, void *bitmap)
+{
+ u32int csum = 0;
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
+ u32int inodes_per_group =
+ ext4_get32(sb, inodes_per_group);
+
+ /* First calculate crc32 checksum against fs uuid */
+ csum = ext4_crc32c(EXT4_CRC32_INIT, sb->uuid, sizeof(sb->uuid));
+ /* Then calculate crc32 checksum against inode bitmap */
+ csum = ext4_crc32c(csum, bitmap, (inodes_per_group + 7) / 8);
+ }
+ return csum;
+}
+
+void ext4_ialloc_set_bitmap_csum(struct ext4_sblock *sb, struct ext4_bgroup *bg, void *bitmap)
+{
+ int desc_size = ext4_sb_get_desc_size(sb);
+ u32int csum = ext4_ialloc_bitmap_csum(sb, bitmap);
+ u16int lo_csum = to_le16(csum & 0xFFFF),
+ hi_csum = to_le16(csum >> 16);
+
+ if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+ return;
+
+ /* See if we need to assign a 32bit checksum */
+ bg->inode_bitmap_csum_lo = lo_csum;
+ if (desc_size == EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ bg->inode_bitmap_csum_hi = hi_csum;
+
+}
+
+static bool
+ext4_ialloc_verify_bitmap_csum(struct ext4_sblock *sb, struct ext4_bgroup *bg, void *bitmap)
+{
+ int desc_size = ext4_sb_get_desc_size(sb);
+ u32int csum = ext4_ialloc_bitmap_csum(sb, bitmap);
+ u16int lo_csum = to_le16(csum & 0xFFFF),
+ hi_csum = to_le16(csum >> 16);
+
+ if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM))
+ return true;
+
+ if (bg->inode_bitmap_csum_lo != lo_csum)
+ return false;
+
+ if (desc_size == EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ if (bg->inode_bitmap_csum_hi != hi_csum)
+ return false;
+
+ return true;
+}
+
+int ext4_ialloc_free_inode(struct ext4_fs *fs, u32int index, bool is_dir)
+{
+ struct ext4_sblock *sb = &fs->sb;
+
+ /* Compute index of block group and load it */
+ u32int block_group = ext4_ialloc_get_bgid_of_inode(sb, index);
+
+ struct ext4_block_group_ref bg_ref;
+ int rc = ext4_fs_get_block_group_ref(fs, block_group, &bg_ref);
+ if (rc != 0)
+ return rc;
+
+ struct ext4_bgroup *bg = bg_ref.block_group;
+
+ /* Load i-node bitmap */
+ ext4_fsblk_t bitmap_block_addr =
+ ext4_bg_get_inode_bitmap(bg, sb);
+
+ struct ext4_block b;
+ rc = ext4_trans_block_get(fs->bdev, &b, bitmap_block_addr);
+ if (rc != 0)
+ return rc;
+
+ if (!ext4_ialloc_verify_bitmap_csum(sb, bg, b.data)) {
+ ext4_dbg(DEBUG_IALLOC,
+ DBG_WARN "Bitmap checksum failed."
+ "Group: %ud\n",
+ bg_ref.index);
+ }
+
+ /* Free i-node in the bitmap */
+ u32int index_in_group = ext4_ialloc_inode_to_bgidx(sb, index);
+ ext4_bmap_bit_clr(b.data, index_in_group);
+ ext4_ialloc_set_bitmap_csum(sb, bg, b.data);
+ ext4_trans_set_block_dirty(b.buf);
+
+ /* Put back the block with bitmap */
+ rc = ext4_block_set(fs->bdev, &b);
+ if (rc != 0) {
+ /* Error in saving bitmap */
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return rc;
+ }
+
+ /* If released i-node is a directory, decrement used directories count
+ */
+ if (is_dir) {
+ u32int bg_used_dirs = ext4_bg_get_used_dirs_count(bg, sb);
+ bg_used_dirs--;
+ ext4_bg_set_used_dirs_count(bg, sb, bg_used_dirs);
+ }
+
+ /* Update block group free inodes count */
+ u32int free_inodes = ext4_bg_get_free_inodes_count(bg, sb);
+ free_inodes++;
+ ext4_bg_set_free_inodes_count(bg, sb, free_inodes);
+
+ bg_ref.dirty = true;
+
+ /* Put back the modified block group */
+ rc = ext4_fs_put_block_group_ref(&bg_ref);
+ if (rc != 0)
+ return rc;
+
+ /* Update superblock free inodes count */
+ ext4_set32(sb, free_inodes_count,
+ ext4_get32(sb, free_inodes_count) + 1);
+
+ return 0;
+}
+
+int ext4_ialloc_alloc_inode(struct ext4_fs *fs, u32int *idx, bool is_dir)
+{
+ struct ext4_sblock *sb = &fs->sb;
+
+ u32int bgid = fs->last_inode_bg_id;
+ u32int bg_count = ext4_block_group_cnt(sb);
+ u32int sb_free_inodes = ext4_get32(sb, free_inodes_count);
+ bool rewind = false;
+
+ /* Try to find free i-node in all block groups */
+ while (bgid <= bg_count) {
+
+ if (bgid == bg_count) {
+ if (rewind)
+ break;
+ bg_count = fs->last_inode_bg_id;
+ bgid = 0;
+ rewind = true;
+ continue;
+ }
+
+ /* Load block group to check */
+ struct ext4_block_group_ref bg_ref;
+ int rc = ext4_fs_get_block_group_ref(fs, bgid, &bg_ref);
+ if (rc != 0)
+ return rc;
+
+ struct ext4_bgroup *bg = bg_ref.block_group;
+
+ /* Read necessary values for algorithm */
+ u32int free_inodes = ext4_bg_get_free_inodes_count(bg, sb);
+ u32int used_dirs = ext4_bg_get_used_dirs_count(bg, sb);
+
+ /* Check if this block group is good candidate for allocation */
+ if (free_inodes > 0) {
+ /* Load block with bitmap */
+ ext4_fsblk_t bmp_blk_add = ext4_bg_get_inode_bitmap(bg, sb);
+
+ struct ext4_block b;
+ rc = ext4_trans_block_get(fs->bdev, &b, bmp_blk_add);
+ if (rc != 0) {
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return rc;
+ }
+
+ if (!ext4_ialloc_verify_bitmap_csum(sb, bg, b.data)) {
+ ext4_dbg(DEBUG_IALLOC,
+ DBG_WARN "Bitmap checksum failed."
+ "Group: %ud\n",
+ bg_ref.index);
+ }
+
+ /* Try to allocate i-node in the bitmap */
+ u32int inodes_in_bg;
+ u32int idx_in_bg;
+
+ inodes_in_bg = ext4_inodes_in_group_cnt(sb, bgid);
+ bool no_space;
+ rc = ext4_bmap_bit_find_clr(b.data, 0, inodes_in_bg, &idx_in_bg, &no_space);
+ /* Block group does not have any free i-node */
+ if (no_space) {
+ rc = ext4_block_set(fs->bdev, &b);
+ if (rc != 0) {
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return rc;
+ }
+
+ rc = ext4_fs_put_block_group_ref(&bg_ref);
+ if (rc != 0)
+ return rc;
+
+ continue;
+ }
+
+ ext4_bmap_bit_set(b.data, idx_in_bg);
+
+ /* Free i-node found, save the bitmap */
+ ext4_ialloc_set_bitmap_csum(sb,bg,
+ b.data);
+ ext4_trans_set_block_dirty(b.buf);
+
+ ext4_block_set(fs->bdev, &b);
+ if (rc != 0) {
+ ext4_fs_put_block_group_ref(&bg_ref);
+ return rc;
+ }
+
+ /* Modify filesystem counters */
+ free_inodes--;
+ ext4_bg_set_free_inodes_count(bg, sb, free_inodes);
+
+ /* Increment used directories counter */
+ if (is_dir) {
+ used_dirs++;
+ ext4_bg_set_used_dirs_count(bg, sb, used_dirs);
+ }
+
+ /* Decrease unused inodes count */
+ u32int unused =
+ ext4_bg_get_itable_unused(bg, sb);
+
+ u32int free = inodes_in_bg - unused;
+
+ if (idx_in_bg >= free) {
+ unused = inodes_in_bg - (idx_in_bg + 1);
+ ext4_bg_set_itable_unused(bg, sb, unused);
+ }
+
+ /* Save modified block group */
+ bg_ref.dirty = true;
+
+ rc = ext4_fs_put_block_group_ref(&bg_ref);
+ if (rc != 0)
+ return rc;
+
+ /* Update superblock */
+ sb_free_inodes--;
+ ext4_set32(sb, free_inodes_count, sb_free_inodes);
+
+ /* Compute the absolute i-nodex number */
+ *idx = ext4_ialloc_bgidx_to_inode(sb, idx_in_bg, bgid);
+
+ fs->last_inode_bg_id = bgid;
+
+ return 0;
+ }
+
+ /* Block group not modified, put it and jump to the next block
+ * group */
+ ext4_fs_put_block_group_ref(&bg_ref);
+ if (rc != 0)
+ return rc;
+
+ ++bgid;
+ }
+
+ werrstr(Enospc);
+ return -1;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_inode.c
@@ -1,0 +1,365 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_inode.h"
+#include "ext4_super.h"
+
+/**@brief Compute number of bits for block count.
+ * @param block_size Filesystem block_size
+ * @return Number of bits
+ */
+static u32int ext4_inode_block_bits_count(u32int block_size)
+{
+ u32int bits = 8;
+ u32int size = block_size;
+
+ do {
+ bits++;
+ size = size >> 1;
+ } while (size > 256);
+
+ return bits;
+}
+
+u32int ext4_inode_get_mode(struct ext4_sblock *sb, struct ext4_inode *inode)
+{
+ u32int v = to_le16(inode->mode);
+
+ if (ext4_get32(sb, creator_os) == EXT4_SUPERBLOCK_OS_HURD) {
+ v |= ((u32int)to_le16(inode->osd2.hurd2.mode_high)) << 16;
+ }
+
+ return v;
+}
+
+void ext4_inode_set_mode(struct ext4_sblock *sb, struct ext4_inode *inode,
+ u32int mode)
+{
+ inode->mode = to_le16((mode << 16) >> 16);
+
+ if (ext4_get32(sb, creator_os) == EXT4_SUPERBLOCK_OS_HURD)
+ inode->osd2.hurd2.mode_high = to_le16(mode >> 16);
+}
+
+u32int ext4_inode_get_uid(struct ext4_inode *inode)
+{
+ return to_le32(inode->uid);
+}
+
+void ext4_inode_set_uid(struct ext4_inode *inode, u32int uid)
+{
+ inode->uid = to_le32(uid);
+}
+
+u64int ext4_inode_get_size(struct ext4_sblock *sb, struct ext4_inode *inode)
+{
+ u64int v = to_le32(inode->size_lo);
+
+ if ((ext4_get32(sb, rev_level) > 0) &&
+ (ext4_inode_is_type(sb, inode, EXT4_INODE_MODE_FILE)))
+ v |= ((u64int)to_le32(inode->size_hi)) << 32;
+
+ return v;
+}
+
+void ext4_inode_set_size(struct ext4_inode *inode, u64int size)
+{
+ inode->size_lo = to_le32((size << 32) >> 32);
+ inode->size_hi = to_le32(size >> 32);
+}
+
+u32int ext4_inode_get_csum(struct ext4_sblock *sb, struct ext4_inode *inode)
+{
+ u16int inode_size = ext4_get16(sb, inode_size);
+ u32int v = to_le16(inode->osd2.linux2.checksum_lo);
+
+ if (inode_size > EXT4_GOOD_OLD_INODE_SIZE)
+ v |= ((u32int)to_le16(inode->checksum_hi)) << 16;
+
+ return v;
+}
+
+void ext4_inode_set_csum(struct ext4_sblock *sb, struct ext4_inode *inode,
+ u32int checksum)
+{
+ u16int inode_size = ext4_get16(sb, inode_size);
+ inode->osd2.linux2.checksum_lo =
+ to_le16((checksum << 16) >> 16);
+
+ if (inode_size > EXT4_GOOD_OLD_INODE_SIZE)
+ inode->checksum_hi = to_le16(checksum >> 16);
+
+}
+
+u32int ext4_inode_get_access_time(struct ext4_inode *inode)
+{
+ return to_le32(inode->access_time);
+}
+void ext4_inode_set_access_time(struct ext4_inode *inode, u32int time)
+{
+ inode->access_time = to_le32(time);
+}
+
+u32int ext4_inode_get_change_inode_time(struct ext4_inode *inode)
+{
+ return to_le32(inode->change_inode_time);
+}
+void ext4_inode_set_change_inode_time(struct ext4_inode *inode, u32int time)
+{
+ inode->change_inode_time = to_le32(time);
+}
+
+u32int ext4_inode_get_modif_time(struct ext4_inode *inode)
+{
+ return to_le32(inode->modification_time);
+}
+
+void ext4_inode_set_modif_time(struct ext4_inode *inode, u32int time)
+{
+ inode->modification_time = to_le32(time);
+}
+
+u32int ext4_inode_get_del_time(struct ext4_inode *inode)
+{
+ return to_le32(inode->deletion_time);
+}
+
+void ext4_inode_set_del_time(struct ext4_inode *inode, u32int time)
+{
+ inode->deletion_time = to_le32(time);
+}
+
+u32int ext4_inode_get_creation_time(struct ext4_inode *inode)
+{
+ return to_le32(inode->crtime);
+}
+
+u32int ext4_inode_get_gid(struct ext4_inode *inode)
+{
+ return to_le32(inode->gid);
+}
+void ext4_inode_set_gid(struct ext4_inode *inode, u32int gid)
+{
+ inode->gid = to_le32(gid);
+}
+
+u16int ext4_inode_get_links_cnt(struct ext4_inode *inode)
+{
+ return to_le16(inode->links_count);
+}
+void ext4_inode_set_links_cnt(struct ext4_inode *inode, u16int cnt)
+{
+ inode->links_count = to_le16(cnt);
+}
+
+u64int ext4_inode_get_blocks_count(struct ext4_sblock *sb,
+ struct ext4_inode *inode)
+{
+ u64int cnt = to_le32(inode->blocks_count_lo);
+
+ if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_HUGE_FILE)) {
+
+ /* 48-bit field */
+ cnt |= (u64int)to_le16(inode->osd2.linux2.blocks_high) << 32;
+
+ if (ext4_inode_has_flag(inode, EXT4_INODE_FLAG_HUGE_FILE)) {
+
+ u32int block_count = ext4_sb_get_block_size(sb);
+ u32int b = ext4_inode_block_bits_count(block_count);
+ return cnt << (b - 9);
+ }
+ }
+
+ return cnt;
+}
+
+int ext4_inode_set_blocks_count(struct ext4_sblock *sb,
+ struct ext4_inode *inode, u64int count)
+{
+ /* 32-bit maximum */
+ u64int max = 0;
+ max = ~max >> 32;
+
+ if (count <= max) {
+ inode->blocks_count_lo = to_le32((u32int)count);
+ inode->osd2.linux2.blocks_high = 0;
+ ext4_inode_clear_flag(inode, EXT4_INODE_FLAG_HUGE_FILE);
+
+ return 0;
+ }
+
+ /* Check if there can be used huge files (many blocks) */
+ if (!ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_HUGE_FILE)) {
+ werrstr(Einval);
+ return -1;
+ }
+
+ /* 48-bit maximum */
+ max = 0;
+ max = ~max >> 16;
+
+ if (count <= max) {
+ inode->blocks_count_lo = to_le32((u32int)count);
+ inode->osd2.linux2.blocks_high = to_le16((u16int)(count >> 32));
+ ext4_inode_clear_flag(inode, EXT4_INODE_FLAG_HUGE_FILE);
+ } else {
+ u32int block_count = ext4_sb_get_block_size(sb);
+ u32int block_bits =ext4_inode_block_bits_count(block_count);
+
+ ext4_inode_set_flag(inode, EXT4_INODE_FLAG_HUGE_FILE);
+ count = count >> (block_bits - 9);
+ inode->blocks_count_lo = to_le32((u32int)count);
+ inode->osd2.linux2.blocks_high = to_le16((u16int)(count >> 32));
+ }
+
+ return 0;
+}
+
+u32int ext4_inode_get_flags(struct ext4_inode *inode)
+{
+ return to_le32(inode->flags);
+}
+void ext4_inode_set_flags(struct ext4_inode *inode, u32int flags)
+{
+ inode->flags = to_le32(flags);
+}
+
+u32int ext4_inode_get_generation(struct ext4_inode *inode)
+{
+ return to_le32(inode->generation);
+}
+void ext4_inode_set_generation(struct ext4_inode *inode, u32int gen)
+{
+ inode->generation = to_le32(gen);
+}
+
+u16int ext4_inode_get_extra_isize(struct ext4_sblock *sb,
+ struct ext4_inode *inode)
+{
+ u16int inode_size = ext4_get16(sb, inode_size);
+ if (inode_size > EXT4_GOOD_OLD_INODE_SIZE)
+ return to_le16(inode->extra_isize);
+ else
+ return 0;
+}
+
+void ext4_inode_set_extra_isize(struct ext4_sblock *sb,
+ struct ext4_inode *inode,
+ u16int size)
+{
+ u16int inode_size = ext4_get16(sb, inode_size);
+ if (inode_size > EXT4_GOOD_OLD_INODE_SIZE)
+ inode->extra_isize = to_le16(size);
+}
+
+u64int ext4_inode_get_file_acl(struct ext4_inode *inode,
+ struct ext4_sblock *sb)
+{
+ u64int v = to_le32(inode->file_acl_lo);
+
+ if (ext4_get32(sb, creator_os) == EXT4_SUPERBLOCK_OS_LINUX)
+ v |= (u32int)to_le16(inode->osd2.linux2.file_acl_high) << 16;
+
+ return v;
+}
+
+void ext4_inode_set_file_acl(struct ext4_inode *inode, struct ext4_sblock *sb,
+ u64int acl)
+{
+ inode->file_acl_lo = to_le32((acl << 32) >> 32);
+
+ if (ext4_get32(sb, creator_os) == EXT4_SUPERBLOCK_OS_LINUX)
+ inode->osd2.linux2.file_acl_high = to_le16((u16int)(acl >> 32));
+}
+
+u32int ext4_inode_get_direct_block(struct ext4_inode *inode, u32int idx)
+{
+ return to_le32(inode->blocks[idx]);
+}
+void ext4_inode_set_direct_block(struct ext4_inode *inode, u32int idx,
+ u32int block)
+{
+ inode->blocks[idx] = to_le32(block);
+}
+
+u32int ext4_inode_get_indirect_block(struct ext4_inode *inode, u32int idx)
+{
+ return to_le32(inode->blocks[idx + EXT4_INODE_INDIRECT_BLOCK]);
+}
+
+void ext4_inode_set_indirect_block(struct ext4_inode *inode, u32int idx,
+ u32int block)
+{
+ inode->blocks[idx + EXT4_INODE_INDIRECT_BLOCK] = to_le32(block);
+}
+
+u32int ext4_inode_get_dev(struct ext4_inode *inode)
+{
+ u32int dev_0, dev_1;
+ dev_0 = ext4_inode_get_direct_block(inode, 0);
+ dev_1 = ext4_inode_get_direct_block(inode, 1);
+
+ if (dev_0)
+ return dev_0;
+ else
+ return dev_1;
+}
+
+void ext4_inode_set_dev(struct ext4_inode *inode, u32int dev)
+{
+ if (dev & ~0xFFFF)
+ ext4_inode_set_direct_block(inode, 1, dev);
+ else
+ ext4_inode_set_direct_block(inode, 0, dev);
+}
+
+u32int ext4_inode_type(struct ext4_sblock *sb, struct ext4_inode *inode)
+{
+ return (ext4_inode_get_mode(sb, inode) & EXT4_INODE_MODE_TYPE_MASK);
+}
+
+bool ext4_inode_is_type(struct ext4_sblock *sb, struct ext4_inode *inode,
+ u32int type)
+{
+ return ext4_inode_type(sb, inode) == type;
+}
+
+bool ext4_inode_has_flag(struct ext4_inode *inode, u32int f)
+{
+ return ext4_inode_get_flags(inode) & f;
+}
+
+void ext4_inode_clear_flag(struct ext4_inode *inode, u32int f)
+{
+ u32int flags = ext4_inode_get_flags(inode);
+ flags = flags & (~f);
+ ext4_inode_set_flags(inode, flags);
+}
+
+void ext4_inode_set_flag(struct ext4_inode *inode, u32int f)
+{
+ u32int flags = ext4_inode_get_flags(inode);
+ flags = flags | f;
+ ext4_inode_set_flags(inode, flags);
+}
+
+bool ext4_inode_can_truncate(struct ext4_sblock *sb, struct ext4_inode *inode)
+{
+ if ((ext4_inode_has_flag(inode, EXT4_INODE_FLAG_APPEND)) ||
+ (ext4_inode_has_flag(inode, EXT4_INODE_FLAG_IMMUTABLE)))
+ return false;
+
+ if ((ext4_inode_is_type(sb, inode, EXT4_INODE_MODE_FILE)) ||
+ (ext4_inode_is_type(sb, inode, EXT4_INODE_MODE_DIRECTORY)) ||
+ (ext4_inode_is_type(sb, inode, EXT4_INODE_MODE_SOFTLINK)))
+ return true;
+
+ return false;
+}
+
+struct ext4_extent_header *
+ext4_inode_get_extent_header(struct ext4_inode *inode)
+{
+ return (struct ext4_extent_header *)inode->blocks;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_journal.c
@@ -1,0 +1,2232 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_fs.h"
+#include "ext4_super.h"
+#include "ext4_journal.h"
+#include "ext4_blockdev.h"
+#include "ext4_crc32.h"
+#include "ext4_journal.h"
+
+/**@brief Revoke entry during journal replay.*/
+struct revoke_entry {
+ /**@brief Block number not to be replayed.*/
+ ext4_fsblk_t block;
+
+ /**@brief For any transaction id smaller
+ * than trans_id, records of @block
+ * in those transactions should not
+ * be replayed.*/
+ u32int trans_id;
+
+ /**@brief Revoke tree node.*/
+ RB_ENTRY(revoke_entry) revoke_node;
+};
+
+/**@brief Valid journal replay information.*/
+struct recover_info {
+ /**@brief Starting transaction id.*/
+ u32int start_trans_id;
+
+ /**@brief Ending transaction id.*/
+ u32int last_trans_id;
+
+ /**@brief Used as internal argument.*/
+ u32int this_trans_id;
+
+ /**@brief No of transactions went through.*/
+ u32int trans_cnt;
+
+ /**@brief RB-Tree storing revoke entries.*/
+ RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
+};
+
+/**@brief Journal replay internal arguments.*/
+struct replay_arg {
+ /**@brief Journal replay information.*/
+ struct recover_info *info;
+
+ /**@brief Current block we are on.*/
+ u32int *this_block;
+
+ /**@brief Current trans_id we are on.*/
+ u32int this_trans_id;
+};
+
+/* Make sure we wrap around the log correctly! */
+#define wrap(sb, var) \
+do { \
+ if (var >= jbd_get32((sb), maxlen)) \
+ var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first)); \
+} while (0)
+
+static inline s32int
+trans_id_diff(u32int x, u32int y)
+{
+ s32int diff = x - y;
+ return diff;
+}
+
+static int
+jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
+{
+ if (a->block > b->block)
+ return 1;
+ else if (a->block < b->block)
+ return -1;
+ return 0;
+}
+
+static int
+jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b)
+{
+ if (a->lba > b->lba)
+ return 1;
+ else if (a->lba < b->lba)
+ return -1;
+ return 0;
+}
+
+static int
+jbd_revoke_rec_cmp(struct jbd_revoke_rec *a, struct jbd_revoke_rec *b)
+{
+ if (a->lba > b->lba)
+ return 1;
+ else if (a->lba < b->lba)
+ return -1;
+ return 0;
+}
+
+RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
+ jbd_revoke_entry_cmp, static inline)
+RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
+ jbd_block_rec_cmp, static inline)
+RB_GENERATE_INTERNAL(jbd_revoke_tree, jbd_revoke_rec, revoke_node,
+ jbd_revoke_rec_cmp, static inline)
+
+#define jbd_alloc_revoke_entry() ext4_calloc(1, sizeof(struct revoke_entry))
+#define jbd_free_revoke_entry(addr) ext4_free(addr)
+
+static int jbd_has_csum(struct jbd_sb *jbd_sb)
+{
+ if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2))
+ return 2;
+
+ if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3))
+ return 3;
+
+ return 0;
+}
+
+static u32int jbd_sb_csum(struct jbd_sb *jbd_sb)
+{
+ u32int checksum = 0;
+
+ if (jbd_has_csum(jbd_sb)) {
+ u32int orig_checksum = jbd_sb->checksum;
+ jbd_set32(jbd_sb, checksum, 0);
+ /* Calculate crc32c checksum against tho whole superblock */
+ checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_sb,
+ JBD_SUPERBLOCK_SIZE);
+ jbd_sb->checksum = orig_checksum;
+ }
+ return checksum;
+}
+
+static void jbd_sb_csum_set(struct jbd_sb *jbd_sb)
+{
+ if (!jbd_has_csum(jbd_sb))
+ return;
+
+ jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb));
+}
+
+static bool
+jbd_verify_sb_csum(struct jbd_sb *jbd_sb)
+{
+ if (!jbd_has_csum(jbd_sb))
+ return true;
+
+ return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum);
+}
+
+static u32int jbd_meta_csum(struct jbd_fs *jbd_fs,
+ struct jbd_bhdr *bhdr)
+{
+ u32int checksum = 0;
+
+ if (jbd_has_csum(&jbd_fs->sb)) {
+ u32int block_size = jbd_get32(&jbd_fs->sb, blocksize);
+ struct jbd_block_tail *tail =
+ (struct jbd_block_tail *)((char *)bhdr + block_size -
+ sizeof(struct jbd_block_tail));
+ u32int orig_checksum = tail->checksum;
+ tail->checksum = 0;
+
+ /* First calculate crc32c checksum against fs uuid */
+ checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
+ sizeof(jbd_fs->sb.uuid));
+ /* Calculate crc32c checksum against tho whole block */
+ checksum = ext4_crc32c(checksum, bhdr,
+ block_size);
+ tail->checksum = orig_checksum;
+ }
+ return checksum;
+}
+
+static void jbd_meta_csum_set(struct jbd_fs *jbd_fs,
+ struct jbd_bhdr *bhdr)
+{
+ u32int block_size = jbd_get32(&jbd_fs->sb, blocksize);
+ struct jbd_block_tail *tail = (struct jbd_block_tail *)
+ ((char *)bhdr + block_size -
+ sizeof(struct jbd_block_tail));
+ if (!jbd_has_csum(&jbd_fs->sb))
+ return;
+
+ tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr));
+}
+
+static bool
+jbd_verify_meta_csum(struct jbd_fs *jbd_fs,
+ struct jbd_bhdr *bhdr)
+{
+ u32int block_size = jbd_get32(&jbd_fs->sb, blocksize);
+ struct jbd_block_tail *tail = (struct jbd_block_tail *)
+ ((char *)bhdr + block_size -
+ sizeof(struct jbd_block_tail));
+ if (!jbd_has_csum(&jbd_fs->sb))
+ return true;
+
+ return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum);
+}
+
+static u32int jbd_commit_csum(struct jbd_fs *jbd_fs,
+ struct jbd_commit_header *header)
+{
+ u32int checksum = 0;
+
+ if (jbd_has_csum(&jbd_fs->sb)) {
+ u8int orig_checksum_type = header->chksum_type,
+ orig_checksum_size = header->chksum_size;
+ u32int orig_checksum = header->chksum[0];
+ u32int block_size = jbd_get32(&jbd_fs->sb, blocksize);
+ header->chksum_type = 0;
+ header->chksum_size = 0;
+ header->chksum[0] = 0;
+
+ /* First calculate crc32c checksum against fs uuid */
+ checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
+ sizeof(jbd_fs->sb.uuid));
+ /* Calculate crc32c checksum against tho whole block */
+ checksum = ext4_crc32c(checksum, header,
+ block_size);
+
+ header->chksum_type = orig_checksum_type;
+ header->chksum_size = orig_checksum_size;
+ header->chksum[0] = orig_checksum;
+ }
+ return checksum;
+}
+
+static void jbd_commit_csum_set(struct jbd_fs *jbd_fs,
+ struct jbd_commit_header *header)
+{
+ if (!jbd_has_csum(&jbd_fs->sb))
+ return;
+
+ header->chksum_type = 0;
+ header->chksum_size = 0;
+ header->chksum[0] = jbd_commit_csum(jbd_fs, header);
+}
+
+static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs,
+ struct jbd_commit_header *header)
+{
+ if (!jbd_has_csum(&jbd_fs->sb))
+ return true;
+
+ return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs,
+ header));
+}
+
+/*
+ * NOTE: We only make use of @csum parameter when
+ * JBD_FEATURE_COMPAT_CHECKSUM is enabled.
+ */
+static u32int jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf,
+ u32int csum,
+ u32int sequence)
+{
+ u32int checksum = 0;
+
+ if (jbd_has_csum(&jbd_fs->sb)) {
+ u32int block_size = jbd_get32(&jbd_fs->sb, blocksize);
+ /* First calculate crc32c checksum against fs uuid */
+ checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
+ sizeof(jbd_fs->sb.uuid));
+ /* Then calculate crc32c checksum against sequence no. */
+ checksum = ext4_crc32c(checksum, &sequence,
+ sizeof(u32int));
+ /* Calculate crc32c checksum against tho whole block */
+ checksum = ext4_crc32c(checksum, buf,
+ block_size);
+ } else if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+ JBD_FEATURE_COMPAT_CHECKSUM)) {
+ u32int block_size = jbd_get32(&jbd_fs->sb, blocksize);
+ /* Calculate crc32c checksum against tho whole block */
+ checksum = ext4_crc32(csum, buf,
+ block_size);
+ }
+ return checksum;
+}
+
+static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag,
+ u32int checksum)
+{
+ int ver = jbd_has_csum(&jbd_fs->sb);
+ if (!ver)
+ return;
+
+ if (ver == 2) {
+ struct jbd_block_tag *tag = __tag;
+ tag->checksum = (u16int)to_be32(checksum);
+ } else {
+ struct jbd_block_tag3 *tag = __tag;
+ tag->checksum = to_be32(checksum);
+ }
+}
+
+/**@brief Write jbd superblock to disk.
+ * @param jbd_fs jbd filesystem
+ * @param s jbd superblock
+ * @return standard error code*/
+static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
+{
+ int rc;
+ struct ext4_fs *fs = jbd_fs->inode_ref.fs;
+ u64int offset;
+ ext4_fsblk_t fblock;
+ rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
+ if (rc != 0)
+ return rc;
+
+ jbd_sb_csum_set(s);
+ offset = fblock * ext4_sb_get_block_size(&fs->sb);
+ return ext4_block_writebytes(fs->bdev, offset, s,
+ EXT4_SUPERBLOCK_SIZE);
+}
+
+/**@brief Read jbd superblock from disk.
+ * @param jbd_fs jbd filesystem
+ * @param s jbd superblock
+ * @return standard error code*/
+static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
+{
+ int rc;
+ struct ext4_fs *fs = jbd_fs->inode_ref.fs;
+ u64int offset;
+ ext4_fsblk_t fblock;
+ rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
+ if (rc != 0)
+ return rc;
+
+ offset = fblock * ext4_sb_get_block_size(&fs->sb);
+ return ext4_block_readbytes(fs->bdev, offset, s,
+ EXT4_SUPERBLOCK_SIZE);
+}
+
+/**@brief Verify jbd superblock.
+ * @param sb jbd superblock
+ * @return true if jbd superblock is valid */
+static bool jbd_verify_sb(struct jbd_sb *sb)
+{
+ struct jbd_bhdr *header = &sb->header;
+ if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
+ return false;
+
+ if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
+ jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
+ return false;
+
+ return jbd_verify_sb_csum(sb);
+}
+
+/**@brief Write back dirty jbd superblock to disk.
+ * @param jbd_fs jbd filesystem
+ * @return standard error code*/
+static int jbd_write_sb(struct jbd_fs *jbd_fs)
+{
+ int rc = 0;
+ if (jbd_fs->dirty) {
+ rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
+ if (rc != 0)
+ return rc;
+
+ jbd_fs->dirty = false;
+ }
+ return rc;
+}
+
+/**@brief Get reference to jbd filesystem.
+ * @param fs Filesystem to load journal of
+ * @param jbd_fs jbd filesystem
+ * @return standard error code*/
+int jbd_get_fs(struct ext4_fs *fs,
+ struct jbd_fs *jbd_fs)
+{
+ int rc;
+ u32int journal_ino;
+
+ memset(jbd_fs, 0, sizeof(struct jbd_fs));
+ /* See if there is journal inode on this filesystem.*/
+ /* FIXME: detection on existance ofbkejournal bdev is
+ * missing.*/
+ journal_ino = ext4_get32(&fs->sb, journal_inode_number);
+
+ rc = ext4_fs_get_inode_ref(fs,
+ journal_ino,
+ &jbd_fs->inode_ref);
+ if (rc != 0)
+ return rc;
+
+ rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
+ if (rc != 0)
+ goto Error;
+
+ if (!jbd_verify_sb(&jbd_fs->sb)) {
+ werrstr(Eio);
+ rc = -1;
+ goto Error;
+ }
+
+ if (rc == 0)
+ jbd_fs->bdev = fs->bdev;
+
+ return rc;
+Error:
+ ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
+ memset(jbd_fs, 0, sizeof(struct jbd_fs));
+
+ return rc;
+}
+
+/**@brief Put reference of jbd filesystem.
+ * @param jbd_fs jbd filesystem
+ * @return standard error code*/
+int jbd_put_fs(struct jbd_fs *jbd_fs)
+{
+ int rc;
+ rc = jbd_write_sb(jbd_fs);
+
+ ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
+ return rc;
+}
+
+/**@brief Data block lookup helper.
+ * @param jbd_fs jbd filesystem
+ * @param iblock block index
+ * @param fblock logical block address
+ * @return standard error code*/
+int jbd_inode_bmap(struct jbd_fs *jbd_fs,
+ ext4_lblk_t iblock,
+ ext4_fsblk_t *fblock)
+{
+ int rc = ext4_fs_get_inode_dblk_idx(
+ &jbd_fs->inode_ref,
+ iblock,
+ fblock,
+ false);
+ return rc;
+}
+
+/**@brief jbd block get function (through cache).
+ * @param jbd_fs jbd filesystem
+ * @param block block descriptor
+ * @param fblock jbd logical block address
+ * @return standard error code*/
+static int jbd_block_get(struct jbd_fs *jbd_fs,
+ struct ext4_block *block,
+ ext4_fsblk_t fblock)
+{
+ /* TODO: journal device. */
+ int rc;
+ struct ext4_blockdev *bdev = jbd_fs->bdev;
+ ext4_lblk_t iblock = (ext4_lblk_t)fblock;
+
+ /* Lookup the logical block address of
+ * fblock.*/
+ rc = jbd_inode_bmap(jbd_fs, iblock,
+ &fblock);
+ if (rc != 0)
+ return rc;
+
+ rc = ext4_block_get(bdev, block, fblock);
+
+ /* If succeeded, mark buffer as BC_FLUSH to indicate
+ * that data should be written to disk immediately.*/
+ if (rc == 0) {
+ ext4_bcache_set_flag(block->buf, BC_FLUSH);
+ /* As we don't want to occupy too much space
+ * in block cache, we set this buffer BC_TMP.*/
+ ext4_bcache_set_flag(block->buf, BC_TMP);
+ }
+
+ return rc;
+}
+
+/**@brief jbd block get function (through cache, don't read).
+ * @param jbd_fs jbd filesystem
+ * @param block block descriptor
+ * @param fblock jbd logical block address
+ * @return standard error code*/
+static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
+ struct ext4_block *block,
+ ext4_fsblk_t fblock)
+{
+ /* TODO: journal device. */
+ int rc;
+ struct ext4_blockdev *bdev = jbd_fs->bdev;
+ ext4_lblk_t iblock = (ext4_lblk_t)fblock;
+ rc = jbd_inode_bmap(jbd_fs, iblock,
+ &fblock);
+ if (rc != 0)
+ return rc;
+
+ rc = ext4_block_get_noread(bdev, block, fblock);
+ if (rc == 0)
+ ext4_bcache_set_flag(block->buf, BC_FLUSH);
+
+ return rc;
+}
+
+/**@brief jbd block set procedure (through cache).
+ * @param jbd_fs jbd filesystem
+ * @param block block descriptor
+ * @return standard error code*/
+static int jbd_block_set(struct jbd_fs *jbd_fs,
+ struct ext4_block *block)
+{
+ struct ext4_blockdev *bdev = jbd_fs->bdev;
+ return ext4_block_set(bdev, block);
+}
+
+/**@brief helper functions to calculate
+ * block tag size, not including UUID part.
+ * @param jbd_fs jbd filesystem
+ * @return tag size in bytes*/
+static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
+{
+ int size;
+
+ /* It is very easy to deal with the case which
+ * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
+ if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+ JBD_FEATURE_INCOMPAT_CSUM_V3))
+ return sizeof(struct jbd_block_tag3);
+
+ size = sizeof(struct jbd_block_tag);
+
+ /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
+ * add 2 bytes to size.*/
+ if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+ JBD_FEATURE_INCOMPAT_CSUM_V2))
+ size += sizeof(u16int);
+
+ if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+ JBD_FEATURE_INCOMPAT_64BIT))
+ return size;
+
+ /* If block number is 4 bytes in size,
+ * minus 4 bytes from size */
+ return size - sizeof(u32int);
+}
+
+/**@brief Tag information. */
+struct tag_info {
+ /**@brief Tag size in bytes, including UUID part.*/
+ int tag_bytes;
+
+ /**@brief block number stored in this tag.*/
+ ext4_fsblk_t block;
+
+ /**@brief Is the first 4 bytes of block equals to
+ * JBD_MAGIC_NUMBER? */
+ bool is_escape;
+
+ /**@brief whether UUID part exists or not.*/
+ bool uuid_exist;
+
+ /**@brief UUID content if UUID part exists.*/
+ u8int uuid[UUID_SIZE];
+
+ /**@brief Is this the last tag? */
+ bool last_tag;
+
+ /**@brief crc32c checksum. */
+ u32int checksum;
+};
+
+/**@brief Extract information from a block tag.
+ * @param __tag pointer to the block tag
+ * @param tag_bytes block tag size of this jbd filesystem
+ * @param remaining size in buffer containing the block tag
+ * @param tag_info information of this tag.
+ * @return 0 when succeed, otherwise return Einval.*/
+static int
+jbd_extract_block_tag(struct jbd_fs *jbd_fs,
+ void *__tag,
+ int tag_bytes,
+ s32int remain_buf_size,
+ struct tag_info *tag_info)
+{
+ char *uuid_start;
+ tag_info->tag_bytes = tag_bytes;
+ tag_info->uuid_exist = false;
+ tag_info->last_tag = false;
+ tag_info->is_escape = false;
+
+ /* See whether it is possible to hold a valid block tag.*/
+ if (remain_buf_size - tag_bytes < 0) {
+ werrstr(Einval);
+ return -1;
+ }
+
+ if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+ JBD_FEATURE_INCOMPAT_CSUM_V3)) {
+ struct jbd_block_tag3 *tag = __tag;
+ tag_info->block = jbd_get32(tag, blocknr);
+ if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+ JBD_FEATURE_INCOMPAT_64BIT))
+ tag_info->block |=
+ (u64int)jbd_get32(tag, blocknr_high) << 32;
+
+ if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
+ tag_info->is_escape = true;
+
+ if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
+ /* See whether it is possible to hold UUID part.*/
+ if (remain_buf_size - tag_bytes < UUID_SIZE) {
+ werrstr(Einval);
+ return -1;
+ }
+
+ uuid_start = (char *)tag + tag_bytes;
+ tag_info->uuid_exist = true;
+ tag_info->tag_bytes += UUID_SIZE;
+ memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
+ }
+
+ if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
+ tag_info->last_tag = true;
+
+ } else {
+ struct jbd_block_tag *tag = __tag;
+ tag_info->block = jbd_get32(tag, blocknr);
+ if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+ JBD_FEATURE_INCOMPAT_64BIT))
+ tag_info->block |=
+ (u64int)jbd_get32(tag, blocknr_high) << 32;
+
+ if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
+ tag_info->is_escape = true;
+
+ if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
+ /* See whether it is possible to hold UUID part.*/
+ if (remain_buf_size - tag_bytes < UUID_SIZE) {
+ werrstr(Einval);
+ return -1;
+ }
+
+ uuid_start = (char *)tag + tag_bytes;
+ tag_info->uuid_exist = true;
+ tag_info->tag_bytes += UUID_SIZE;
+ memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
+ }
+
+ if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
+ tag_info->last_tag = true;
+
+ }
+ return 0;
+}
+
+/**@brief Write information to a block tag.
+ * @param __tag pointer to the block tag
+ * @param remaining size in buffer containing the block tag
+ * @param tag_info information of this tag.
+ * @return 0 when succeed, otherwise return Einval.*/
+static int
+jbd_write_block_tag(struct jbd_fs *jbd_fs,
+ void *__tag,
+ s32int remain_buf_size,
+ struct tag_info *tag_info)
+{
+ char *uuid_start;
+ int tag_bytes = jbd_tag_bytes(jbd_fs);
+
+ tag_info->tag_bytes = tag_bytes;
+
+ /* See whether it is possible to hold a valid block tag.*/
+ if (remain_buf_size - tag_bytes < 0) {
+ werrstr(Einval);
+ return -1;
+ }
+
+ if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+ JBD_FEATURE_INCOMPAT_CSUM_V3)) {
+ struct jbd_block_tag3 *tag = __tag;
+ memset(tag, 0, sizeof(struct jbd_block_tag3));
+ jbd_set32(tag, blocknr, (u32int)tag_info->block);
+ if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+ JBD_FEATURE_INCOMPAT_64BIT))
+ jbd_set32(tag, blocknr_high, tag_info->block >> 32);
+
+ if (tag_info->uuid_exist) {
+ /* See whether it is possible to hold UUID part.*/
+ if (remain_buf_size - tag_bytes < UUID_SIZE) {
+ werrstr(Einval);
+ return -1;
+ }
+
+ uuid_start = (char *)tag + tag_bytes;
+ tag_info->tag_bytes += UUID_SIZE;
+ memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
+ } else
+ jbd_set32(tag, flags,
+ jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
+
+ jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
+
+ if (tag_info->last_tag)
+ jbd_set32(tag, flags,
+ jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
+
+ if (tag_info->is_escape)
+ jbd_set32(tag, flags,
+ jbd_get32(tag, flags) | JBD_FLAG_ESCAPE);
+
+ } else {
+ struct jbd_block_tag *tag = __tag;
+ memset(tag, 0, sizeof(struct jbd_block_tag));
+ jbd_set32(tag, blocknr, (u32int)tag_info->block);
+ if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+ JBD_FEATURE_INCOMPAT_64BIT))
+ jbd_set32(tag, blocknr_high, tag_info->block >> 32);
+
+ if (tag_info->uuid_exist) {
+ /* See whether it is possible to hold UUID part.*/
+ if (remain_buf_size - tag_bytes < UUID_SIZE) {
+ werrstr(Einval);
+ return -1;
+ }
+
+ uuid_start = (char *)tag + tag_bytes;
+ tag_info->tag_bytes += UUID_SIZE;
+ memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
+ } else
+ jbd_set16(tag, flags,
+ jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
+
+ jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
+
+ if (tag_info->last_tag)
+ jbd_set16(tag, flags,
+ jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
+
+
+ if (tag_info->is_escape)
+ jbd_set16(tag, flags,
+ jbd_get16(tag, flags) | JBD_FLAG_ESCAPE);
+
+ }
+ return 0;
+}
+
+/**@brief Iterate all block tags in a block.
+ * @param jbd_fs jbd filesystem
+ * @param __tag_start pointer to the block
+ * @param tag_tbl_size size of the block
+ * @param func callback routine to indicate that
+ * a block tag is found
+ * @param arg additional argument to be passed to func */
+static void
+jbd_iterate_block_table(struct jbd_fs *jbd_fs,
+ void *__tag_start,
+ s32int tag_tbl_size,
+ void (*func)(struct jbd_fs * jbd_fs,
+ struct tag_info *tag_info,
+ void *arg),
+ void *arg)
+{
+ char *tag_start, *tag_ptr;
+ int tag_bytes = jbd_tag_bytes(jbd_fs);
+ tag_start = __tag_start;
+ tag_ptr = tag_start;
+
+ /* Cut off the size of block tail storing checksum. */
+ if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+ JBD_FEATURE_INCOMPAT_CSUM_V2) ||
+ JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+ JBD_FEATURE_INCOMPAT_CSUM_V3))
+ tag_tbl_size -= sizeof(struct jbd_block_tail);
+
+ while (tag_tbl_size) {
+ struct tag_info tag_info;
+ int rc = jbd_extract_block_tag(jbd_fs,
+ tag_ptr,
+ tag_bytes,
+ tag_tbl_size,
+ &tag_info);
+ if (rc != 0)
+ break;
+
+ if (func)
+ func(jbd_fs, &tag_info, arg);
+
+ /* Stop the iteration when we reach the last tag. */
+ if (tag_info.last_tag)
+ break;
+
+ tag_ptr += tag_info.tag_bytes;
+ tag_tbl_size -= tag_info.tag_bytes;
+ }
+}
+
+static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
+ struct tag_info *tag_info,
+ void *arg)
+{
+ u32int *iblock = arg;
+ USED(tag_info);
+ ext4_dbg(DEBUG_JBD, "Block in block_tag: %llud\n", tag_info->block);
+ (*iblock)++;
+ wrap(&jbd_fs->sb, *iblock);
+ (void)jbd_fs;
+ return;
+}
+
+static struct revoke_entry *
+jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
+{
+ struct revoke_entry tmp = {
+ .block = block
+ };
+
+ return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
+}
+
+/**@brief Replay a block in a transaction.
+ * @param jbd_fs jbd filesystem
+ * @param tag_info tag_info of the logged block.*/
+static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
+ struct tag_info *tag_info,
+ void *__arg)
+{
+ int r;
+ struct replay_arg *arg = __arg;
+ struct recover_info *info = arg->info;
+ u32int *this_block = arg->this_block;
+ struct revoke_entry *revoke_entry;
+ struct ext4_block journal_block, ext4_block;
+ struct ext4_fs *fs = jbd_fs->inode_ref.fs;
+
+ (*this_block)++;
+ wrap(&jbd_fs->sb, *this_block);
+
+ /* We replay this block only if the current transaction id
+ * is equal or greater than that in revoke entry.*/
+ revoke_entry = jbd_revoke_entry_lookup(info, tag_info->block);
+ if (revoke_entry &&
+ trans_id_diff(arg->this_trans_id, revoke_entry->trans_id) <= 0)
+ return;
+
+ ext4_dbg(DEBUG_JBD,
+ "Replaying block in block_tag: %llud\n",
+ tag_info->block);
+
+ r = jbd_block_get(jbd_fs, &journal_block, *this_block);
+ if (r != 0)
+ return;
+
+ /* We need special treatment for ext4 superblock. */
+ if (tag_info->block) {
+ r = ext4_block_get_noread(fs->bdev, &ext4_block, tag_info->block);
+ if (r != 0) {
+ jbd_block_set(jbd_fs, &journal_block);
+ return;
+ }
+
+ memcpy(ext4_block.data,
+ journal_block.data,
+ jbd_get32(&jbd_fs->sb, blocksize));
+
+ if (tag_info->is_escape)
+ ((struct jbd_bhdr *)ext4_block.data)->magic =
+ to_be32(JBD_MAGIC_NUMBER);
+
+ ext4_bcache_set_dirty(ext4_block.buf);
+ ext4_block_set(fs->bdev, &ext4_block);
+ } else {
+ u16int mount_count, state;
+ mount_count = ext4_get16(&fs->sb, mount_count);
+ state = ext4_get16(&fs->sb, state);
+
+ memcpy(&fs->sb,
+ journal_block.data + EXT4_SUPERBLOCK_OFFSET,
+ EXT4_SUPERBLOCK_SIZE);
+
+ /* Mark system as mounted */
+ ext4_set16(&fs->sb, state, state);
+ r = ext4_sb_write(fs->bdev, &fs->sb);
+ if (r != 0)
+ return;
+
+ /*Update mount count*/
+ ext4_set16(&fs->sb, mount_count, mount_count);
+ }
+
+ jbd_block_set(jbd_fs, &journal_block);
+
+ return;
+}
+
+/**@brief Add block address to revoke tree, along with
+ * its transaction id.
+ * @param info journal replay info
+ * @param block block address to be replayed.*/
+static void jbd_add_revoke_block_tags(struct recover_info *info,
+ ext4_fsblk_t block)
+{
+ struct revoke_entry *revoke_entry;
+
+ ext4_dbg(DEBUG_JBD, "Add block %llud to revoke tree\n", block);
+ /* If the revoke entry with respect to the block address
+ * exists already, update its transaction id.*/
+ revoke_entry = jbd_revoke_entry_lookup(info, block);
+ if (revoke_entry) {
+ revoke_entry->trans_id = info->this_trans_id;
+ return;
+ }
+
+ revoke_entry = jbd_alloc_revoke_entry();
+ assert(revoke_entry);
+ revoke_entry->block = block;
+ revoke_entry->trans_id = info->this_trans_id;
+ RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
+
+ return;
+}
+
+static void jbd_destroy_revoke_tree(struct recover_info *info)
+{
+ while (!RB_EMPTY(&info->revoke_root)) {
+ struct revoke_entry *revoke_entry =
+ RB_MIN(jbd_revoke, &info->revoke_root);
+ assert(revoke_entry);
+ RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
+ jbd_free_revoke_entry(revoke_entry);
+ }
+}
+
+
+#define ACTION_SCAN 0
+#define ACTION_REVOKE 1
+#define ACTION_RECOVER 2
+
+/**@brief Add entries in a revoke block to revoke tree.
+ * @param jbd_fs jbd filesystem
+ * @param header revoke block header
+ * @param recover_info journal replay info*/
+static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
+ struct jbd_bhdr *header,
+ struct recover_info *info)
+{
+ char *blocks_entry;
+ struct jbd_revoke_header *revoke_hdr =
+ (struct jbd_revoke_header *)header;
+ u32int i, nr_entries, record_len = 4;
+
+ /* If we are working on a 64bit jbd filesystem, */
+ if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
+ JBD_FEATURE_INCOMPAT_64BIT))
+ record_len = 8;
+
+ nr_entries = (jbd_get32(revoke_hdr, count) -
+ sizeof(struct jbd_revoke_header)) /
+ record_len;
+
+ blocks_entry = (char *)(revoke_hdr + 1);
+
+ for (i = 0;i < nr_entries;i++) {
+ if (record_len == 8) {
+ u64int *blocks =
+ (u64int *)blocks_entry;
+ jbd_add_revoke_block_tags(info, to_be64(*blocks));
+ } else {
+ u32int *blocks =
+ (u32int *)blocks_entry;
+ jbd_add_revoke_block_tags(info, to_be32(*blocks));
+ }
+ blocks_entry += record_len;
+ }
+}
+
+static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
+ struct jbd_bhdr *header,
+ u32int *iblock)
+{
+ jbd_iterate_block_table(jbd_fs,
+ header + 1,
+ jbd_get32(&jbd_fs->sb, blocksize) -
+ sizeof(struct jbd_bhdr),
+ jbd_display_block_tags,
+ iblock);
+}
+
+static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
+ struct jbd_bhdr *header,
+ struct replay_arg *arg)
+{
+ jbd_iterate_block_table(jbd_fs,
+ header + 1,
+ jbd_get32(&jbd_fs->sb, blocksize) -
+ sizeof(struct jbd_bhdr),
+ jbd_replay_block_tags,
+ arg);
+}
+
+/**@brief The core routine of journal replay.
+ * @param jbd_fs jbd filesystem
+ * @param recover_info journal replay info
+ * @param action action needed to be taken
+ * @return standard error code*/
+static int jbd_iterate_log(struct jbd_fs *jbd_fs,
+ struct recover_info *info,
+ int action)
+{
+ int r = 0;
+ bool log_end = false;
+ struct jbd_sb *sb = &jbd_fs->sb;
+ u32int start_trans_id, this_trans_id;
+ u32int start_block, this_block;
+
+ /* We start iterating valid blocks in the whole journal.*/
+ start_trans_id = this_trans_id = jbd_get32(sb, sequence);
+ start_block = this_block = jbd_get32(sb, start);
+ if (action == ACTION_SCAN)
+ info->trans_cnt = 0;
+ else if (!info->trans_cnt)
+ log_end = true;
+
+ ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %ud\n",
+ start_trans_id);
+
+ while (!log_end) {
+ struct ext4_block block;
+ struct jbd_bhdr *header;
+ /* If we are not scanning for the last
+ * valid transaction in the journal,
+ * we will stop when we reach the end of
+ * the journal.*/
+ if (action != ACTION_SCAN)
+ if (trans_id_diff(this_trans_id, info->last_trans_id) > 0) {
+ log_end = true;
+ continue;
+ }
+
+ r = jbd_block_get(jbd_fs, &block, this_block);
+ if (r != 0)
+ break;
+
+ header = (struct jbd_bhdr *)block.data;
+ /* This block does not have a valid magic number,
+ * so we have reached the end of the journal.*/
+ if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
+ jbd_block_set(jbd_fs, &block);
+ log_end = true;
+ continue;
+ }
+
+ /* If the transaction id we found is not expected,
+ * we may have reached the end of the journal.
+ *
+ * If we are not scanning the journal, something
+ * bad might have taken place. :-( */
+ if (jbd_get32(header, sequence) != this_trans_id) {
+ if (action != ACTION_SCAN) {
+ werrstr(Eio);
+ r = -1;
+ }
+
+ jbd_block_set(jbd_fs, &block);
+ log_end = true;
+ continue;
+ }
+
+ switch (jbd_get32(header, blocktype)) {
+ case JBD_DESCRIPTOR_BLOCK:
+ if (!jbd_verify_meta_csum(jbd_fs, header)) {
+ ext4_dbg(DEBUG_JBD,
+ DBG_WARN "Descriptor block checksum failed."
+ "Journal block: %ud\n",
+ this_block);
+ log_end = true;
+ break;
+ }
+ ext4_dbg(DEBUG_JBD, "Descriptor block: %ud, "
+ "trans_id: %ud\n",
+ this_block, this_trans_id);
+ if (action == ACTION_RECOVER) {
+ struct replay_arg replay_arg;
+ replay_arg.info = info;
+ replay_arg.this_block = &this_block;
+ replay_arg.this_trans_id = this_trans_id;
+
+ jbd_replay_descriptor_block(jbd_fs,
+ header, &replay_arg);
+ } else
+ jbd_debug_descriptor_block(jbd_fs,
+ header, &this_block);
+
+ break;
+ case JBD_COMMIT_BLOCK:
+ if (!jbd_verify_commit_csum(jbd_fs,
+ (struct jbd_commit_header *)header)) {
+ ext4_dbg(DEBUG_JBD,
+ DBG_WARN "Commit block checksum failed."
+ "Journal block: %ud\n",
+ this_block);
+ log_end = true;
+ break;
+ }
+ ext4_dbg(DEBUG_JBD, "Commit block: %ud, "
+ "trans_id: %ud\n",
+ this_block, this_trans_id);
+ /*
+ * This is the end of a transaction,
+ * we may now proceed to the next transaction.
+ */
+ this_trans_id++;
+ if (action == ACTION_SCAN)
+ info->trans_cnt++;
+ break;
+ case JBD_REVOKE_BLOCK:
+ if (!jbd_verify_meta_csum(jbd_fs, header)) {
+ ext4_dbg(DEBUG_JBD,
+ DBG_WARN "Revoke block checksum failed."
+ "Journal block: %ud\n",
+ this_block);
+ log_end = true;
+ break;
+ }
+ ext4_dbg(DEBUG_JBD, "Revoke block: %ud, "
+ "trans_id: %ud\n",
+ this_block, this_trans_id);
+ if (action == ACTION_REVOKE) {
+ info->this_trans_id = this_trans_id;
+ jbd_build_revoke_tree(jbd_fs,
+ header, info);
+ }
+ break;
+ default:
+ log_end = true;
+ break;
+ }
+ jbd_block_set(jbd_fs, &block);
+ this_block++;
+ wrap(sb, this_block);
+ if (this_block == start_block)
+ log_end = true;
+
+ }
+ ext4_dbg(DEBUG_JBD, "End of journal.\n");
+ if (r == 0 && action == ACTION_SCAN) {
+ /* We have finished scanning the journal. */
+ info->start_trans_id = start_trans_id;
+ if (trans_id_diff(this_trans_id, start_trans_id) > 0)
+ info->last_trans_id = this_trans_id - 1;
+ else
+ info->last_trans_id = this_trans_id;
+ }
+
+ return r;
+}
+
+/**@brief Replay journal.
+ * @param jbd_fs jbd filesystem
+ * @return standard error code*/
+int jbd_recover(struct jbd_fs *jbd_fs)
+{
+ int r;
+ struct recover_info info;
+ struct jbd_sb *sb = &jbd_fs->sb;
+ if (!sb->start)
+ return 0;
+
+ RB_INIT(&info.revoke_root);
+
+ r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
+ if (r != 0)
+ return r;
+
+ r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
+ if (r != 0)
+ return r;
+
+ r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
+ if (r == 0) {
+ /* If we successfully replay the journal,
+ * clear EXT4_FINCOM_RECOVER flag on the
+ * ext4 superblock, and set the start of
+ * journal to 0.*/
+ u32int features_incompatible =
+ ext4_get32(&jbd_fs->inode_ref.fs->sb,
+ features_incompatible);
+ jbd_set32(&jbd_fs->sb, start, 0);
+ jbd_set32(&jbd_fs->sb, sequence, info.last_trans_id);
+ features_incompatible &= ~EXT4_FINCOM_RECOVER;
+ ext4_set32(&jbd_fs->inode_ref.fs->sb,
+ features_incompatible,
+ features_incompatible);
+ jbd_fs->dirty = true;
+ r = ext4_sb_write(jbd_fs->bdev,
+ &jbd_fs->inode_ref.fs->sb);
+ }
+ jbd_destroy_revoke_tree(&info);
+ return r;
+}
+
+static void jbd_journal_write_sb(struct jbd_journal *journal)
+{
+ struct jbd_fs *jbd_fs = journal->jbd_fs;
+ jbd_set32(&jbd_fs->sb, start, journal->start);
+ jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
+ jbd_fs->dirty = true;
+}
+
+/**@brief Start accessing the journal.
+ * @param jbd_fs jbd filesystem
+ * @param journal current journal session
+ * @return standard error code*/
+int jbd_journal_start(struct jbd_fs *jbd_fs,
+ struct jbd_journal *journal)
+{
+ int r;
+ u32int features_incompatible =
+ ext4_get32(&jbd_fs->inode_ref.fs->sb,
+ features_incompatible);
+ features_incompatible |= EXT4_FINCOM_RECOVER;
+ ext4_set32(&jbd_fs->inode_ref.fs->sb,
+ features_incompatible,
+ features_incompatible);
+ r = ext4_sb_write(jbd_fs->bdev,
+ &jbd_fs->inode_ref.fs->sb);
+ if (r != 0)
+ return r;
+
+ journal->first = jbd_get32(&jbd_fs->sb, first);
+ journal->start = journal->first;
+ journal->last = journal->first;
+ /*
+ * To invalidate any stale records we need to start from
+ * the checkpoint transaction ID of the previous journalling session
+ * plus 1.
+ */
+ journal->trans_id = jbd_get32(&jbd_fs->sb, sequence) + 1;
+ journal->alloc_trans_id = journal->trans_id;
+
+ journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
+
+ TAILQ_INIT(&journal->cp_queue);
+ RB_INIT(&journal->block_rec_root);
+ journal->jbd_fs = jbd_fs;
+ jbd_journal_write_sb(journal);
+ r = jbd_write_sb(jbd_fs);
+ if (r != 0)
+ return r;
+
+ jbd_fs->bdev->journal = journal;
+ return 0;
+}
+
+static void jbd_trans_end_write(struct ext4_bcache *bc,
+ struct ext4_buf *buf,
+ int res,
+ void *arg);
+
+/*
+ * This routine is only suitable to committed transactions. */
+static void jbd_journal_flush_trans(struct jbd_trans *trans)
+{
+ struct jbd_buf *jbd_buf, *tmp;
+ struct jbd_journal *journal = trans->journal;
+ struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
+ void *tmp_data = ext4_malloc(journal->block_size);
+ assert(tmp_data);
+
+ TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
+ struct ext4_buf *buf;
+ struct ext4_block block;
+ /* The buffer is not yet flushed. */
+ buf = ext4_bcache_find_get(fs->bdev->bc, &block,
+ jbd_buf->block_rec->lba);
+ if (!(buf && ext4_bcache_test_flag(buf, BC_UPTODATE) &&
+ jbd_buf->block_rec->trans == trans)) {
+ int r;
+ struct ext4_block jbd_block = EXT4_BLOCK_ZERO();
+ r = jbd_block_get(journal->jbd_fs,
+ &jbd_block,
+ jbd_buf->jbd_lba);
+ assert(r == 0);
+ assert(jbd_block.data != nil);
+ memcpy(tmp_data, jbd_block.data,
+ journal->block_size);
+ ext4_block_set(fs->bdev, &jbd_block);
+ r = ext4_blocks_set_direct(fs->bdev, tmp_data,
+ jbd_buf->block_rec->lba, 1);
+ jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf);
+ } else
+ ext4_block_flush_buf(fs->bdev, buf);
+
+ if (buf)
+ ext4_block_set(fs->bdev, &block);
+ }
+
+ ext4_free(tmp_data);
+}
+
+static void
+jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
+ struct jbd_trans *trans)
+{
+ journal->start = trans->start_iblock +
+ trans->alloc_blocks;
+ wrap(&journal->jbd_fs->sb, journal->start);
+ journal->trans_id = trans->trans_id + 1;
+ jbd_journal_free_trans(journal,
+ trans, false);
+ jbd_journal_write_sb(journal);
+}
+
+void
+jbd_journal_purge_cp_trans(struct jbd_journal *journal,
+ bool flush,
+ bool once)
+{
+ struct jbd_trans *trans;
+ while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
+ if (!trans->data_cnt) {
+ TAILQ_REMOVE(&journal->cp_queue,
+ trans,
+ trans_node);
+ jbd_journal_skip_pure_revoke(journal, trans);
+ } else {
+ if (trans->data_cnt ==
+ trans->written_cnt) {
+ journal->start =
+ trans->start_iblock +
+ trans->alloc_blocks;
+ wrap(&journal->jbd_fs->sb,
+ journal->start);
+ journal->trans_id =
+ trans->trans_id + 1;
+ TAILQ_REMOVE(&journal->cp_queue,
+ trans,
+ trans_node);
+ jbd_journal_free_trans(journal,
+ trans,
+ false);
+ jbd_journal_write_sb(journal);
+ } else if (!flush) {
+ journal->start =
+ trans->start_iblock;
+ wrap(&journal->jbd_fs->sb,
+ journal->start);
+ journal->trans_id =
+ trans->trans_id;
+ jbd_journal_write_sb(journal);
+ break;
+ } else
+ jbd_journal_flush_trans(trans);
+ }
+ if (once)
+ break;
+ }
+}
+
+/**@brief Stop accessing the journal.
+ * @param journal current journal session
+ * @return standard error code*/
+int jbd_journal_stop(struct jbd_journal *journal)
+{
+ int r;
+ struct jbd_fs *jbd_fs = journal->jbd_fs;
+ u32int features_incompatible;
+
+ /* Make sure that journalled content have reached
+ * the disk.*/
+ jbd_journal_purge_cp_trans(journal, true, false);
+
+ /* There should be no block record in this journal
+ * session. */
+ if (!RB_EMPTY(&journal->block_rec_root))
+ ext4_dbg(DEBUG_JBD,
+ DBG_WARN "There are still block records "
+ "in this journal session!\n");
+
+ features_incompatible =
+ ext4_get32(&jbd_fs->inode_ref.fs->sb,
+ features_incompatible);
+ features_incompatible &= ~EXT4_FINCOM_RECOVER;
+ ext4_set32(&jbd_fs->inode_ref.fs->sb,
+ features_incompatible,
+ features_incompatible);
+ r = ext4_sb_write(jbd_fs->bdev,
+ &jbd_fs->inode_ref.fs->sb);
+ if (r != 0)
+ return r;
+
+ journal->start = 0;
+ journal->trans_id = 0;
+ jbd_journal_write_sb(journal);
+ return jbd_write_sb(journal->jbd_fs);
+}
+
+/**@brief Allocate a block in the journal.
+ * @param journal current journal session
+ * @param trans transaction
+ * @return allocated block address*/
+static u32int jbd_journal_alloc_block(struct jbd_journal *journal,
+ struct jbd_trans *trans)
+{
+ u32int start_block;
+
+ start_block = journal->last++;
+ trans->alloc_blocks++;
+ wrap(&journal->jbd_fs->sb, journal->last);
+
+ /* If there is no space left, flush just one journalled
+ * transaction.*/
+ if (journal->last == journal->start) {
+ jbd_journal_purge_cp_trans(journal, true, true);
+ assert(journal->last != journal->start);
+ }
+
+ return start_block;
+}
+
+static struct jbd_block_rec *
+jbd_trans_block_rec_lookup(struct jbd_journal *journal,
+ ext4_fsblk_t lba)
+{
+ struct jbd_block_rec tmp = {
+ .lba = lba
+ };
+
+ return RB_FIND(jbd_block,
+ &journal->block_rec_root,
+ &tmp);
+}
+
+static void
+jbd_trans_change_ownership(struct jbd_block_rec *block_rec,
+ struct jbd_trans *new_trans)
+{
+ LIST_REMOVE(block_rec, tbrec_node);
+ if (new_trans) {
+ /* Now this block record belongs to this transaction. */
+ LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
+ }
+ block_rec->trans = new_trans;
+}
+
+static inline struct jbd_block_rec *
+jbd_trans_insert_block_rec(struct jbd_trans *trans,
+ ext4_fsblk_t lba)
+{
+ struct jbd_block_rec *block_rec;
+ block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
+ if (block_rec) {
+ jbd_trans_change_ownership(block_rec, trans);
+ return block_rec;
+ }
+ block_rec = ext4_calloc(1, sizeof(struct jbd_block_rec));
+ if (!block_rec)
+ return nil;
+
+ block_rec->lba = lba;
+ block_rec->trans = trans;
+ TAILQ_INIT(&block_rec->dirty_buf_queue);
+ LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
+ RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec);
+ return block_rec;
+}
+
+/*
+ * This routine will do the dirty works.
+ */
+static void
+jbd_trans_finish_callback(struct jbd_journal *journal,
+ const struct jbd_trans *trans,
+ struct jbd_block_rec *block_rec,
+ bool abort,
+ bool revoke)
+{
+ struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
+ if (block_rec->trans != trans)
+ return;
+
+ if (!abort) {
+ struct jbd_buf *jbd_buf, *tmp;
+ TAILQ_FOREACH_SAFE(jbd_buf,
+ &block_rec->dirty_buf_queue,
+ dirty_buf_node,
+ tmp) {
+ jbd_trans_end_write(fs->bdev->bc, nil, 0, jbd_buf);
+ }
+ } else {
+ /*
+ * We have to roll back data if the block is going to be
+ * aborted.
+ */
+ struct jbd_buf *jbd_buf;
+ struct ext4_block jbd_block = EXT4_BLOCK_ZERO(),
+ block = EXT4_BLOCK_ZERO();
+ jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
+ jbd_buf_dirty);
+ if (jbd_buf) {
+ if (!revoke) {
+ int r;
+ r = ext4_block_get_noread(fs->bdev,
+ &block,
+ block_rec->lba);
+ assert(r == 0);
+ r = jbd_block_get(journal->jbd_fs,
+ &jbd_block,
+ jbd_buf->jbd_lba);
+ assert(r == 0);
+ memcpy(block.data, jbd_block.data,
+ journal->block_size);
+
+ jbd_trans_change_ownership(block_rec,
+ jbd_buf->trans);
+
+ block.buf->end_write = jbd_trans_end_write;
+ block.buf->end_write_arg = jbd_buf;
+
+ ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
+ ext4_bcache_set_dirty(block.buf);
+
+ ext4_block_set(fs->bdev, &jbd_block);
+ ext4_block_set(fs->bdev, &block);
+ return;
+ } else {
+ /* The revoked buffer is yet written. */
+ jbd_trans_change_ownership(block_rec,
+ jbd_buf->trans);
+ }
+ }
+ }
+}
+
+static inline void
+jbd_trans_remove_block_rec(struct jbd_journal *journal,
+ struct jbd_block_rec *block_rec,
+ struct jbd_trans *trans)
+{
+ /* If this block record doesn't belong to this transaction,
+ * give up.*/
+ if (block_rec->trans == trans) {
+ LIST_REMOVE(block_rec, tbrec_node);
+ RB_REMOVE(jbd_block,
+ &journal->block_rec_root,
+ block_rec);
+ ext4_free(block_rec);
+ }
+}
+
+/**@brief Add block to a transaction and mark it dirty.
+ * @param trans transaction
+ * @param block block descriptor
+ * @return standard error code*/
+int jbd_trans_set_block_dirty(struct jbd_trans *trans,
+ struct ext4_block *block)
+{
+ struct jbd_buf *jbd_buf;
+ struct jbd_revoke_rec *rec, tmp_rec = {
+ .lba = block->lb_id
+ };
+ struct jbd_block_rec *block_rec;
+
+ if (block->buf->end_write == jbd_trans_end_write) {
+ jbd_buf = block->buf->end_write_arg;
+ if (jbd_buf && jbd_buf->trans == trans)
+ return 0;
+ }
+ jbd_buf = ext4_calloc(1, sizeof(struct jbd_buf));
+ if (!jbd_buf) {
+ werrstr(Enomem);
+ return -1;
+ }
+
+ if ((block_rec = jbd_trans_insert_block_rec(trans,
+ block->lb_id)) == nil) {
+ ext4_free(jbd_buf);
+ werrstr(Enomem);
+ return -1;
+ }
+
+ TAILQ_INSERT_TAIL(&block_rec->dirty_buf_queue,
+ jbd_buf,
+ dirty_buf_node);
+
+ jbd_buf->block_rec = block_rec;
+ jbd_buf->trans = trans;
+ jbd_buf->block = *block;
+ ext4_bcache_inc_ref(block->buf);
+
+ /* If the content reach the disk, notify us
+ * so that we may do a checkpoint. */
+ block->buf->end_write = jbd_trans_end_write;
+ block->buf->end_write_arg = jbd_buf;
+
+ trans->data_cnt++;
+ TAILQ_INSERT_HEAD(&trans->buf_queue, jbd_buf, buf_node);
+
+ ext4_bcache_set_dirty(block->buf);
+ rec = RB_FIND(jbd_revoke_tree,
+ &trans->revoke_root,
+ &tmp_rec);
+ if (rec) {
+ RB_REMOVE(jbd_revoke_tree, &trans->revoke_root,
+ rec);
+ ext4_free(rec);
+ }
+
+ return 0;
+}
+
+/**@brief Add block to be revoked to a transaction
+ * @param trans transaction
+ * @param lba logical block address
+ * @return standard error code*/
+int jbd_trans_revoke_block(struct jbd_trans *trans,
+ ext4_fsblk_t lba)
+{
+ struct jbd_revoke_rec tmp_rec = {
+ .lba = lba
+ }, *rec;
+ rec = RB_FIND(jbd_revoke_tree,
+ &trans->revoke_root,
+ &tmp_rec);
+ if (rec)
+ return 0;
+
+ rec = ext4_calloc(1, sizeof(struct jbd_revoke_rec));
+ if (!rec) {
+ werrstr(Enomem);
+ return -1;
+ }
+
+ rec->lba = lba;
+ RB_INSERT(jbd_revoke_tree, &trans->revoke_root, rec);
+ return 0;
+}
+
+/**@brief Try to add block to be revoked to a transaction.
+ * If @lba still remains in an transaction on checkpoint
+ * queue, add @lba as a revoked block to the transaction.
+ * @param trans transaction
+ * @param lba logical block address
+ * @return standard error code*/
+int jbd_trans_try_revoke_block(struct jbd_trans *trans,
+ ext4_fsblk_t lba)
+{
+ struct jbd_journal *journal = trans->journal;
+ struct jbd_block_rec *block_rec =
+ jbd_trans_block_rec_lookup(journal, lba);
+
+ if (block_rec) {
+ if (block_rec->trans == trans) {
+ struct jbd_buf *jbd_buf =
+ TAILQ_LAST(&block_rec->dirty_buf_queue,
+ jbd_buf_dirty);
+ /* If there are still unwritten buffers. */
+ if (TAILQ_FIRST(&block_rec->dirty_buf_queue) !=
+ jbd_buf)
+ jbd_trans_revoke_block(trans, lba);
+
+ } else
+ jbd_trans_revoke_block(trans, lba);
+ }
+
+ return 0;
+}
+
+/**@brief Free a transaction
+ * @param journal current journal session
+ * @param trans transaction
+ * @param abort discard all the modifications on the block?
+ * @return standard error code*/
+void jbd_journal_free_trans(struct jbd_journal *journal,
+ struct jbd_trans *trans,
+ bool abort)
+{
+ struct jbd_buf *jbd_buf, *tmp;
+ struct jbd_revoke_rec *rec, *tmp2;
+ struct jbd_block_rec *block_rec, *tmp3;
+ struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
+ TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
+ tmp) {
+ block_rec = jbd_buf->block_rec;
+ if (abort) {
+ jbd_buf->block.buf->end_write = nil;
+ jbd_buf->block.buf->end_write_arg = nil;
+ ext4_bcache_clear_dirty(jbd_buf->block.buf);
+ ext4_block_set(fs->bdev, &jbd_buf->block);
+ }
+
+ TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
+ jbd_buf,
+ dirty_buf_node);
+ jbd_trans_finish_callback(journal,
+ trans,
+ block_rec,
+ abort,
+ false);
+ TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
+ ext4_free(jbd_buf);
+ }
+ RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
+ tmp2) {
+ RB_REMOVE(jbd_revoke_tree, &trans->revoke_root, rec);
+ ext4_free(rec);
+ }
+ LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
+ tmp3) {
+ jbd_trans_remove_block_rec(journal, block_rec, trans);
+ }
+
+ ext4_free(trans);
+}
+
+/**@brief Write commit block for a transaction
+ * @param trans transaction
+ * @return standard error code*/
+static int jbd_trans_write_commit_block(struct jbd_trans *trans)
+{
+ int rc;
+ struct ext4_block block;
+ struct jbd_commit_header *header;
+ u32int commit_iblock;
+ struct jbd_journal *journal = trans->journal;
+
+ commit_iblock = jbd_journal_alloc_block(journal, trans);
+
+ rc = jbd_block_get_noread(journal->jbd_fs, &block, commit_iblock);
+ if (rc != 0)
+ return rc;
+
+ header = (struct jbd_commit_header *)block.data;
+ jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
+ jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
+ jbd_set32(&header->header, sequence, trans->trans_id);
+
+ if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
+ JBD_FEATURE_COMPAT_CHECKSUM)) {
+ header->chksum_type = JBD_CRC32_CHKSUM;
+ header->chksum_size = JBD_CRC32_CHKSUM_SIZE;
+ jbd_set32(header, chksum[0], trans->data_csum);
+ }
+ jbd_commit_csum_set(journal->jbd_fs, header);
+ ext4_bcache_set_dirty(block.buf);
+ ext4_bcache_set_flag(block.buf, BC_TMP);
+ rc = jbd_block_set(journal->jbd_fs, &block);
+ return rc;
+}
+
+/**@brief Write descriptor block for a transaction
+ * @param journal current journal session
+ * @param trans transaction
+ * @return standard error code*/
+static int jbd_journal_prepare(struct jbd_journal *journal,
+ struct jbd_trans *trans)
+{
+ int rc = 0, i = 0;
+ struct ext4_block desc_block = EXT4_BLOCK_ZERO(),
+ data_block = EXT4_BLOCK_ZERO();
+ s32int tag_tbl_size = 0;
+ u32int desc_iblock = 0;
+ u32int data_iblock;
+ char *tag_start, *tag_ptr = nil;
+ struct jbd_buf *jbd_buf, *tmp;
+ struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
+ u32int checksum = EXT4_CRC32_INIT;
+ struct jbd_bhdr *bhdr = nil;
+ void *data;
+
+ /* Try to remove any non-dirty buffers from the tail of
+ * buf_queue. */
+ TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
+ jbd_trans_buf, buf_node, tmp) {
+ struct jbd_revoke_rec tmp_rec = {
+ .lba = jbd_buf->block_rec->lba
+ };
+ /* We stop the iteration when we find a dirty buffer. */
+ if (ext4_bcache_test_flag(jbd_buf->block.buf,
+ BC_DIRTY))
+ break;
+
+ TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
+ jbd_buf,
+ dirty_buf_node);
+
+ jbd_buf->block.buf->end_write = nil;
+ jbd_buf->block.buf->end_write_arg = nil;
+ jbd_trans_finish_callback(journal,
+ trans,
+ jbd_buf->block_rec,
+ true,
+ RB_FIND(jbd_revoke_tree,
+ &trans->revoke_root,
+ &tmp_rec) != nil);
+ jbd_trans_remove_block_rec(journal,
+ jbd_buf->block_rec, trans);
+ trans->data_cnt--;
+
+ ext4_block_set(fs->bdev, &jbd_buf->block);
+ TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
+ ext4_free(jbd_buf);
+ }
+
+ TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
+ struct tag_info tag_info;
+ bool uuid_exist = false;
+ bool is_escape = false;
+ struct jbd_revoke_rec tmp_rec = {
+ .lba = jbd_buf->block_rec->lba
+ };
+ if (!ext4_bcache_test_flag(jbd_buf->block.buf,
+ BC_DIRTY)) {
+ TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
+ jbd_buf,
+ dirty_buf_node);
+
+ jbd_buf->block.buf->end_write = nil;
+ jbd_buf->block.buf->end_write_arg = nil;
+
+ /* The buffer has not been modified, just release
+ * that jbd_buf. */
+ jbd_trans_finish_callback(journal,
+ trans,
+ jbd_buf->block_rec,
+ true,
+ RB_FIND(jbd_revoke_tree,
+ &trans->revoke_root,
+ &tmp_rec) != nil);
+ jbd_trans_remove_block_rec(journal,
+ jbd_buf->block_rec, trans);
+ trans->data_cnt--;
+
+ ext4_block_set(fs->bdev, &jbd_buf->block);
+ TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
+ ext4_free(jbd_buf);
+ continue;
+ }
+ checksum = jbd_block_csum(journal->jbd_fs,
+ jbd_buf->block.data,
+ checksum,
+ trans->trans_id);
+ if (((struct jbd_bhdr *)jbd_buf->block.data)->magic ==
+ to_be32(JBD_MAGIC_NUMBER))
+ is_escape = true;
+
+again:
+ if (!desc_iblock) {
+ desc_iblock = jbd_journal_alloc_block(journal, trans);
+ rc = jbd_block_get_noread(journal->jbd_fs, &desc_block, desc_iblock);
+ if (rc != 0)
+ break;
+
+ bhdr = (struct jbd_bhdr *)desc_block.data;
+ jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
+ jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
+ jbd_set32(bhdr, sequence, trans->trans_id);
+
+ tag_start = (char *)(bhdr + 1);
+ tag_ptr = tag_start;
+ uuid_exist = true;
+ tag_tbl_size = journal->block_size -
+ sizeof(struct jbd_bhdr);
+
+ if (jbd_has_csum(&journal->jbd_fs->sb))
+ tag_tbl_size -= sizeof(struct jbd_block_tail);
+
+ if (!trans->start_iblock)
+ trans->start_iblock = desc_iblock;
+
+ ext4_bcache_set_dirty(desc_block.buf);
+ ext4_bcache_set_flag(desc_block.buf, BC_TMP);
+ }
+ tag_info.block = jbd_buf->block.lb_id;
+ tag_info.uuid_exist = uuid_exist;
+ tag_info.is_escape = is_escape;
+ if (i == trans->data_cnt - 1)
+ tag_info.last_tag = true;
+ else
+ tag_info.last_tag = false;
+
+ tag_info.checksum = checksum;
+
+ if (uuid_exist)
+ memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
+ UUID_SIZE);
+
+ rc = jbd_write_block_tag(journal->jbd_fs,
+ tag_ptr,
+ tag_tbl_size,
+ &tag_info);
+ if (rc != 0) {
+ jbd_meta_csum_set(journal->jbd_fs, bhdr);
+ desc_iblock = 0;
+ rc = jbd_block_set(journal->jbd_fs, &desc_block);
+ if (rc != 0)
+ break;
+
+ goto again;
+ }
+
+ data_iblock = jbd_journal_alloc_block(journal, trans);
+ rc = jbd_block_get_noread(journal->jbd_fs, &data_block, data_iblock);
+ if (rc != 0) {
+ desc_iblock = 0;
+ ext4_bcache_clear_dirty(desc_block.buf);
+ jbd_block_set(journal->jbd_fs, &desc_block);
+ break;
+ }
+
+ data = data_block.data;
+ memcpy(data, jbd_buf->block.data,
+ journal->block_size);
+ if (is_escape)
+ ((struct jbd_bhdr *)data)->magic = 0;
+
+ ext4_bcache_set_dirty(data_block.buf);
+ ext4_bcache_set_flag(data_block.buf, BC_TMP);
+ rc = jbd_block_set(journal->jbd_fs, &data_block);
+ if (rc != 0) {
+ desc_iblock = 0;
+ ext4_bcache_clear_dirty(desc_block.buf);
+ jbd_block_set(journal->jbd_fs, &desc_block);
+ break;
+ }
+ jbd_buf->jbd_lba = data_iblock;
+
+ tag_ptr += tag_info.tag_bytes;
+ tag_tbl_size -= tag_info.tag_bytes;
+
+ i++;
+ }
+ if (rc == 0 && desc_iblock) {
+ jbd_meta_csum_set(journal->jbd_fs,
+ (struct jbd_bhdr *)bhdr);
+ trans->data_csum = checksum;
+ rc = jbd_block_set(journal->jbd_fs, &desc_block);
+ }
+
+ return rc;
+}
+
+/**@brief Write revoke block for a transaction
+ * @param journal current journal session
+ * @param trans transaction
+ * @return standard error code*/
+static int
+jbd_journal_prepare_revoke(struct jbd_journal *journal,
+ struct jbd_trans *trans)
+{
+ int rc = 0, i = 0;
+ struct ext4_block desc_block = EXT4_BLOCK_ZERO();
+ s32int tag_tbl_size = 0;
+ u32int desc_iblock = 0;
+ char *blocks_entry = nil;
+ struct jbd_revoke_rec *rec, *tmp;
+ struct jbd_revoke_header *header = nil;
+ s32int record_len = 4;
+ struct jbd_bhdr *bhdr = nil;
+
+ if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
+ JBD_FEATURE_INCOMPAT_64BIT))
+ record_len = 8;
+
+ RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
+ tmp) {
+again:
+ if (!desc_iblock) {
+ desc_iblock = jbd_journal_alloc_block(journal, trans);
+ rc = jbd_block_get_noread(journal->jbd_fs, &desc_block,
+ desc_iblock);
+ if (rc != 0)
+ break;
+
+ bhdr = (struct jbd_bhdr *)desc_block.data;
+ jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
+ jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
+ jbd_set32(bhdr, sequence, trans->trans_id);
+
+ header = (struct jbd_revoke_header *)bhdr;
+ blocks_entry = (char *)(header + 1);
+ tag_tbl_size = journal->block_size -
+ sizeof(struct jbd_revoke_header);
+
+ if (jbd_has_csum(&journal->jbd_fs->sb))
+ tag_tbl_size -= sizeof(struct jbd_block_tail);
+
+ if (!trans->start_iblock)
+ trans->start_iblock = desc_iblock;
+
+ ext4_bcache_set_dirty(desc_block.buf);
+ ext4_bcache_set_flag(desc_block.buf, BC_TMP);
+ }
+
+ if (tag_tbl_size < record_len) {
+ jbd_set32(header, count,
+ journal->block_size - tag_tbl_size);
+ jbd_meta_csum_set(journal->jbd_fs, bhdr);
+ bhdr = nil;
+ desc_iblock = 0;
+ header = nil;
+ rc = jbd_block_set(journal->jbd_fs, &desc_block);
+ if (rc != 0)
+ break;
+
+ goto again;
+ }
+ if (record_len == 8) {
+ u64int *blocks =
+ (u64int *)blocks_entry;
+ *blocks = to_be64(rec->lba);
+ } else {
+ u32int *blocks =
+ (u32int *)blocks_entry;
+ *blocks = to_be32((u32int)rec->lba);
+ }
+ blocks_entry += record_len;
+ tag_tbl_size -= record_len;
+
+ i++;
+ }
+ if (rc == 0 && desc_iblock) {
+ if (header != nil)
+ jbd_set32(header, count,
+ journal->block_size - tag_tbl_size);
+
+ jbd_meta_csum_set(journal->jbd_fs, bhdr);
+ rc = jbd_block_set(journal->jbd_fs, &desc_block);
+ }
+
+ return rc;
+}
+
+/**@brief Put references of block descriptors in a transaction.
+ * @param journal current journal session
+ * @param trans transaction*/
+void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
+{
+ struct jbd_buf *jbd_buf, *tmp;
+ struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
+ TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
+ tmp) {
+ struct ext4_block block = jbd_buf->block;
+ ext4_block_set(fs->bdev, &block);
+ }
+}
+
+/**@brief Update the start block of the journal when
+ * all the contents in a transaction reach the disk.*/
+static void jbd_trans_end_write(struct ext4_bcache *bc,
+ struct ext4_buf *buf,
+ int res,
+ void *arg)
+{
+ struct jbd_buf *jbd_buf = arg;
+ struct jbd_trans *trans = jbd_buf->trans;
+ struct jbd_block_rec *block_rec = jbd_buf->block_rec;
+ struct jbd_journal *journal = trans->journal;
+ bool first_in_queue =
+ trans == TAILQ_FIRST(&journal->cp_queue);
+ if (res != 0)
+ trans->error = res;
+
+ USED(bc);
+ TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
+ TAILQ_REMOVE(&block_rec->dirty_buf_queue,
+ jbd_buf,
+ dirty_buf_node);
+
+ jbd_trans_finish_callback(journal,
+ trans,
+ jbd_buf->block_rec,
+ false,
+ false);
+ if (block_rec->trans == trans && buf) {
+ /* Clear the end_write and end_write_arg fields. */
+ buf->end_write = nil;
+ buf->end_write_arg = nil;
+ }
+
+ ext4_free(jbd_buf);
+
+ trans->written_cnt++;
+ if (trans->written_cnt == trans->data_cnt) {
+ /* If it is the first transaction on checkpoint queue,
+ * we will shift the start of the journal to the next
+ * transaction, and remove subsequent written
+ * transactions from checkpoint queue until we find
+ * an unwritten one. */
+ if (first_in_queue) {
+ journal->start = trans->start_iblock +
+ trans->alloc_blocks;
+ wrap(&journal->jbd_fs->sb, journal->start);
+ journal->trans_id = trans->trans_id + 1;
+ TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
+ jbd_journal_free_trans(journal, trans, false);
+
+ jbd_journal_purge_cp_trans(journal, false, false);
+ jbd_journal_write_sb(journal);
+ jbd_write_sb(journal->jbd_fs);
+ }
+ }
+}
+
+/**@brief Commit a transaction to the journal immediately.
+ * @param journal current journal session
+ * @param trans transaction
+ * @return standard error code*/
+static int __jbd_journal_commit_trans(struct jbd_journal *journal,
+ struct jbd_trans *trans)
+{
+ int rc;
+ u32int last = journal->last;
+ struct jbd_revoke_rec *rec, *tmp;
+
+ trans->trans_id = journal->alloc_trans_id;
+ rc = jbd_journal_prepare(journal, trans);
+ if (rc != 0)
+ goto Finish;
+
+ rc = jbd_journal_prepare_revoke(journal, trans);
+ if (rc != 0)
+ goto Finish;
+
+ if (TAILQ_EMPTY(&trans->buf_queue) &&
+ RB_EMPTY(&trans->revoke_root)) {
+ /* Since there are no entries in both buffer list
+ * and revoke entry list, we do not consider trans as
+ * complete transaction and just return 0.*/
+ jbd_journal_free_trans(journal, trans, false);
+ goto Finish;
+ }
+
+ rc = jbd_trans_write_commit_block(trans);
+ if (rc != 0)
+ goto Finish;
+
+ journal->alloc_trans_id++;
+
+ /* Complete the checkpoint of buffers which are revoked. */
+ RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
+ tmp) {
+ struct jbd_block_rec *block_rec =
+ jbd_trans_block_rec_lookup(journal, rec->lba);
+ struct jbd_buf *jbd_buf = nil;
+ if (block_rec)
+ jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
+ jbd_buf_dirty);
+ if (jbd_buf) {
+ struct ext4_buf *buf;
+ struct ext4_block block = EXT4_BLOCK_ZERO();
+ /*
+ * We do this to reset the ext4_buf::end_write and
+ * ext4_buf::end_write_arg fields so that the checkpoint
+ * callback won't be triggered again.
+ */
+ buf = ext4_bcache_find_get(journal->jbd_fs->bdev->bc,
+ &block,
+ jbd_buf->block_rec->lba);
+ jbd_trans_end_write(journal->jbd_fs->bdev->bc, buf, 0, jbd_buf);
+ if (buf)
+ ext4_block_set(journal->jbd_fs->bdev, &block);
+ }
+ }
+
+ if (TAILQ_EMPTY(&journal->cp_queue)) {
+ /*
+ * This transaction is going to be the first object in the
+ * checkpoint queue.
+ * When the first transaction in checkpoint queue is completely
+ * written to disk, we shift the tail of the log to right.
+ */
+ if (trans->data_cnt) {
+ journal->start = trans->start_iblock;
+ wrap(&journal->jbd_fs->sb, journal->start);
+ journal->trans_id = trans->trans_id;
+ jbd_journal_write_sb(journal);
+ jbd_write_sb(journal->jbd_fs);
+ TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
+ trans_node);
+ jbd_journal_cp_trans(journal, trans);
+ } else {
+ journal->start = trans->start_iblock +
+ trans->alloc_blocks;
+ wrap(&journal->jbd_fs->sb, journal->start);
+ journal->trans_id = trans->trans_id + 1;
+ jbd_journal_write_sb(journal);
+ jbd_journal_free_trans(journal, trans, false);
+ }
+ } else {
+ /* No need to do anything to the JBD superblock. */
+ TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
+ trans_node);
+ if (trans->data_cnt)
+ jbd_journal_cp_trans(journal, trans);
+ }
+Finish:
+ if (rc != 0) {
+ journal->last = last;
+ jbd_journal_free_trans(journal, trans, true);
+ }
+ return rc;
+}
+
+/**@brief Allocate a new transaction
+ * @param journal current journal session
+ * @return transaction allocated*/
+struct jbd_trans *
+jbd_journal_new_trans(struct jbd_journal *journal)
+{
+ struct jbd_trans *trans;
+ trans = ext4_calloc(1, sizeof(struct jbd_trans));
+ if (!trans)
+ return nil;
+
+ /* We will assign a trans_id to this transaction,
+ * once it has been committed.*/
+ trans->journal = journal;
+ trans->data_csum = EXT4_CRC32_INIT;
+ trans->error = 0;
+ TAILQ_INIT(&trans->buf_queue);
+ return trans;
+}
+
+/**@brief Commit a transaction to the journal immediately.
+ * @param journal current journal session
+ * @param trans transaction
+ * @return standard error code*/
+int jbd_journal_commit_trans(struct jbd_journal *journal,
+ struct jbd_trans *trans)
+{
+ return __jbd_journal_commit_trans(journal, trans);
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_mbr.c
@@ -1,0 +1,165 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_mbr.h"
+
+#define MBR_SIGNATURE 0xAA55
+
+#pragma pack on
+
+struct ext4_part_entry {
+ u8int status;
+ u8int chs1[3];
+ u8int type;
+ u8int chs2[3];
+ u32int first_lba;
+ u32int sectors;
+};
+
+struct ext4_mbr {
+ u8int bootstrap[442];
+ u32int disk_id;
+ struct ext4_part_entry part_entry[4];
+ u16int signature;
+};
+
+#pragma pack off
+
+int ext4_mbr_scan(struct ext4_blockdev *parent, struct ext4_mbr_bdevs *bdevs)
+{
+ int r;
+ usize i;
+
+ ext4_dbg(DEBUG_MBR, DBG_INFO "ext4_mbr_scan\n");
+ memset(bdevs, 0, sizeof(struct ext4_mbr_bdevs));
+ r = ext4_block_init(parent);
+ if (r != 0)
+ return r;
+
+ r = ext4_block_readbytes(parent, 0, parent->bdif->ph_bbuf, 512);
+ if (r != 0) {
+ goto blockdev_fini;
+ }
+
+ const struct ext4_mbr *mbr = (void *)parent->bdif->ph_bbuf;
+
+ if (to_le16(mbr->signature) != MBR_SIGNATURE) {
+ ext4_dbg(DEBUG_MBR, DBG_ERROR "ext4_mbr_scan: unknown "
+ "signature: 0x%x\n", to_le16(mbr->signature));
+ werrstr(Enotfound);
+ r = -1;
+ goto blockdev_fini;
+ }
+
+ /*Show bootstrap code*/
+ ext4_dbg(DEBUG_MBR, "mbr_part: bootstrap:");
+ for (i = 0; i < sizeof(mbr->bootstrap); ++i) {
+ if (!(i & 0xF))
+ ext4_dbg(DEBUG_MBR | DEBUG_NOPREFIX, "\n");
+ ext4_dbg(DEBUG_MBR | DEBUG_NOPREFIX, "%02x, ", mbr->bootstrap[i]);
+ }
+
+ ext4_dbg(DEBUG_MBR | DEBUG_NOPREFIX, "\n\n");
+ for (i = 0; i < 4; ++i) {
+ const struct ext4_part_entry *pe = &mbr->part_entry[i];
+ ext4_dbg(DEBUG_MBR, "mbr_part: %d\n", (int)i);
+ ext4_dbg(DEBUG_MBR, "\tstatus: 0x%x\n", pe->status);
+ ext4_dbg(DEBUG_MBR, "\ttype 0x%x:\n", pe->type);
+ ext4_dbg(DEBUG_MBR, "\tfirst_lba: 0x%ux\n", pe->first_lba);
+ ext4_dbg(DEBUG_MBR, "\tsectors: 0x%ux\n", pe->sectors);
+
+ if (!pe->sectors)
+ continue; /*Empty entry*/
+
+ if (pe->type != 0x83)
+ continue; /*Unsupported entry. 0x83 - linux native*/
+
+ bdevs->partitions[i].bdif = parent->bdif;
+ bdevs->partitions[i].part_offset =
+ (u64int)pe->first_lba * parent->bdif->ph_bsize;
+ bdevs->partitions[i].part_size =
+ (u64int)pe->sectors * parent->bdif->ph_bsize;
+ }
+
+ blockdev_fini:
+ ext4_block_fini(parent);
+ return r;
+}
+
+int ext4_mbr_write(struct ext4_blockdev *parent, struct ext4_mbr_parts *parts, u32int disk_id)
+{
+ int r;
+ u64int disk_size;
+ u32int division_sum = parts->division[0] + parts->division[1] +
+ parts->division[2] + parts->division[3];
+
+ if (division_sum > 100) {
+ werrstr(Einval);
+ return -1;
+ }
+
+ ext4_dbg(DEBUG_MBR, DBG_INFO "ext4_mbr_write\n");
+ r = ext4_block_init(parent);
+ if (r != 0)
+ return r;
+
+ disk_size = parent->part_size;
+
+ /*Calculate CHS*/
+ u32int k = 16;
+ while ((k < 256) && ((disk_size / parent->bdif->ph_bsize / k / 63) > 1024))
+ k *= 2;
+
+ if (k == 256)
+ --k;
+
+ const u32int cyl_size = parent->bdif->ph_bsize * 63 * k;
+ const u32int cyl_count = disk_size / cyl_size;
+
+ struct ext4_mbr *mbr = (void *)parent->bdif->ph_bbuf;
+ memset(mbr, 0, sizeof(struct ext4_mbr));
+
+ mbr->disk_id = disk_id;
+
+ u32int cyl_it = 0;
+ for (int i = 0; i < 4; ++i) {
+ u32int cyl_part = cyl_count * parts->division[i] / 100;
+ if (!cyl_part)
+ continue;
+
+ u32int part_start = cyl_it * cyl_size;
+ u32int part_size = cyl_part * cyl_size;
+
+ if (i == 0) {
+ part_start += 63;
+ part_size -= 63 * parent->bdif->ph_bsize;
+ }
+
+ u32int cyl_end = cyl_part + cyl_it - 1;
+
+ mbr->part_entry[i].status = 0;
+ mbr->part_entry[i].chs1[0] = i ? 0 : 1;;
+ mbr->part_entry[i].chs1[1] = ((cyl_it >> 2) & 0xC0) + 1;
+ mbr->part_entry[i].chs1[2] = cyl_it & 0xFF;
+ mbr->part_entry[i].type = 0x83;
+ mbr->part_entry[i].chs2[0] = k - 1;
+ mbr->part_entry[i].chs2[1] = ((cyl_end >> 2) & 0xC0) + 63;
+ mbr->part_entry[i].chs2[2] = cyl_end & 0xFF;
+
+ mbr->part_entry[i].first_lba = part_start;
+ mbr->part_entry[i].sectors = part_size / parent->bdif->ph_bsize;
+
+ cyl_it += cyl_part;
+ }
+
+ mbr->signature = MBR_SIGNATURE;
+ r = ext4_block_writebytes(parent, 0, parent->bdif->ph_bbuf, 512);
+ if (r != 0)
+ goto blockdev_fini;
+
+
+ blockdev_fini:
+ ext4_block_fini(parent);
+ return r;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_mkfs.c
@@ -1,0 +1,805 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_super.h"
+#include "ext4_block_group.h"
+#include "ext4_dir.h"
+#include "ext4_dir_idx.h"
+#include "ext4_fs.h"
+#include "ext4_inode.h"
+#include "ext4_ialloc.h"
+#include "ext4_mkfs.h"
+
+static inline int log_2(int j)
+{
+ int i;
+
+ for (i = 0; j > 0; i++)
+ j >>= 1;
+
+ return i - 1;
+}
+
+static int sb2info(struct ext4_sblock *sb, struct ext4_mkfs_info *info)
+{
+ if (to_le16(sb->magic) != EXT4_SUPERBLOCK_MAGIC) {
+ werrstr("invalid superblock magic");
+ return -1;
+ }
+
+ info->block_size = 1024 << to_le32(sb->log_block_size);
+ info->blocks_per_group = to_le32(sb->blocks_per_group);
+ info->inodes_per_group = to_le32(sb->inodes_per_group);
+ info->inode_size = to_le16(sb->inode_size);
+ info->inodes = to_le32(sb->inodes_count);
+ info->feat_ro_compat = to_le32(sb->features_read_only);
+ info->feat_compat = to_le32(sb->features_compatible);
+ info->feat_incompat = to_le32(sb->features_incompatible);
+ info->bg_desc_reserve_blocks = to_le16(sb->s_reserved_gdt_blocks);
+ strncpy(info->label,sb->volume_name,sizeof(info->label));
+ info->len = (u64int)info->block_size * ext4_sb_get_blocks_cnt(sb);
+ info->dsc_size = to_le16(sb->desc_size);
+ memcpy(info->uuid, sb->uuid, UUID_SIZE);
+
+ return 0;
+}
+
+static u32int compute_blocks_per_group(struct ext4_mkfs_info *info)
+{
+ return info->block_size * 8;
+}
+
+static u32int compute_inodes(struct ext4_mkfs_info *info)
+{
+ return (u32int)EXT4_DIV_ROUND_UP(info->len, info->block_size) / 4;
+}
+
+static u32int compute_inodes_per_group(struct ext4_mkfs_info *info)
+{
+ u32int blocks = (u32int)EXT4_DIV_ROUND_UP(info->len, info->block_size);
+ u32int block_groups = EXT4_DIV_ROUND_UP(blocks, info->blocks_per_group);
+ u32int inodes = EXT4_DIV_ROUND_UP(info->inodes, block_groups);
+ inodes = EXT4_ALIGN(inodes, (info->block_size / info->inode_size));
+
+ /* After properly rounding up the number of inodes/group,
+ * make sure to update the total inodes field in the info struct.
+ */
+ info->inodes = inodes * block_groups;
+
+ return inodes;
+}
+
+
+static u32int compute_journal_blocks(struct ext4_mkfs_info *info)
+{
+ u32int journal_blocks = (u32int)EXT4_DIV_ROUND_UP(info->len,
+ info->block_size) / 64;
+ if (journal_blocks < 1024)
+ journal_blocks = 1024;
+ if (journal_blocks > 32768)
+ journal_blocks = 32768;
+ return journal_blocks;
+}
+
+static bool has_superblock(struct ext4_mkfs_info *info, u32int bgid)
+{
+ if (!(info->feat_ro_compat & EXT4_FRO_COM_SPARSE_SUPER))
+ return true;
+
+ return ext4_sb_sparse(bgid);
+}
+
+int create_fs_aux_info(struct fs_aux_info *aux_info,
+ struct ext4_mkfs_info *info)
+{
+ aux_info->first_data_block = (info->block_size > 1024) ? 0 : 1;
+ aux_info->len_blocks = info->len / info->block_size;
+ aux_info->inode_table_blocks = EXT4_DIV_ROUND_UP(info->inodes_per_group *
+ info->inode_size, info->block_size);
+ aux_info->groups = (u32int)EXT4_DIV_ROUND_UP(aux_info->len_blocks -
+ aux_info->first_data_block, info->blocks_per_group);
+ aux_info->blocks_per_ind = info->block_size / sizeof(u32int);
+ aux_info->blocks_per_dind =
+ aux_info->blocks_per_ind * aux_info->blocks_per_ind;
+ aux_info->blocks_per_tind =
+ aux_info->blocks_per_dind * aux_info->blocks_per_dind;
+
+ aux_info->bg_desc_blocks =
+ EXT4_DIV_ROUND_UP(aux_info->groups * info->dsc_size,
+ info->block_size);
+
+ aux_info->default_i_flags = EXT4_INODE_FLAG_NOATIME;
+
+ u32int last_group_size = aux_info->len_blocks % info->blocks_per_group;
+ u32int last_header_size = 2 + aux_info->inode_table_blocks;
+ if (has_superblock(info, aux_info->groups - 1))
+ last_header_size += 1 + aux_info->bg_desc_blocks +
+ info->bg_desc_reserve_blocks;
+
+ if (last_group_size > 0 && last_group_size < last_header_size) {
+ aux_info->groups--;
+ aux_info->len_blocks -= last_group_size;
+ }
+
+ aux_info->sb = ext4_calloc(1, EXT4_SUPERBLOCK_SIZE);
+ if (!aux_info->sb) {
+ werrstr(Enomem);
+ return -1;
+ }
+
+ aux_info->bg_desc_blk = ext4_calloc(1, info->block_size);
+ if (!aux_info->bg_desc_blk) {
+ werrstr(Enomem);
+ return -1;
+ }
+
+ aux_info->xattrs = nil;
+
+
+ ext4_dbg(DEBUG_MKFS, DBG_INFO "create_fs_aux_info\n");
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "first_data_block: %ud\n",
+ aux_info->first_data_block);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "len_blocks: %llud\n",
+ aux_info->len_blocks);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "inode_table_blocks: %ud\n",
+ aux_info->inode_table_blocks);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "groups: %ud\n",
+ aux_info->groups);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "bg_desc_blocks: %ud\n",
+ aux_info->bg_desc_blocks);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "default_i_flags: %ud\n",
+ aux_info->default_i_flags);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "blocks_per_ind: %ud\n",
+ aux_info->blocks_per_ind);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "blocks_per_dind: %ud\n",
+ aux_info->blocks_per_dind);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "blocks_per_tind: %ud\n",
+ aux_info->blocks_per_tind);
+
+ return 0;
+}
+
+void release_fs_aux_info(struct fs_aux_info *aux_info)
+{
+ if (aux_info->sb)
+ ext4_free(aux_info->sb);
+ if (aux_info->bg_desc_blk)
+ ext4_free(aux_info->bg_desc_blk);
+}
+
+
+/* Fill in the superblock memory buffer based on the filesystem parameters */
+static void fill_sb(struct fs_aux_info *aux_info, struct ext4_mkfs_info *info)
+{
+ struct ext4_sblock *sb = aux_info->sb;
+
+ sb->inodes_count = to_le32(info->inodes_per_group * aux_info->groups);
+
+ ext4_sb_set_blocks_cnt(sb, aux_info->len_blocks);
+ ext4_sb_set_free_blocks_cnt(sb, aux_info->len_blocks);
+ sb->free_inodes_count = to_le32(info->inodes_per_group * aux_info->groups);
+
+ sb->reserved_blocks_count_lo = to_le32(0);
+ sb->first_data_block = to_le32(aux_info->first_data_block);
+ sb->log_block_size = to_le32(log_2(info->block_size / 1024));
+ sb->log_cluster_size = to_le32(log_2(info->block_size / 1024));
+ sb->blocks_per_group = to_le32(info->blocks_per_group);
+ sb->frags_per_group = to_le32(info->blocks_per_group);
+ sb->inodes_per_group = to_le32(info->inodes_per_group);
+ sb->mount_time = to_le32(0);
+ sb->write_time = to_le32(0);
+ sb->mount_count = to_le16(0);
+ sb->max_mount_count = to_le16(0xFFFF);
+ sb->magic = to_le16(EXT4_SUPERBLOCK_MAGIC);
+ sb->state = to_le16(EXT4_SUPERBLOCK_STATE_VALID_FS);
+ sb->errors = to_le16(EXT4_SUPERBLOCK_ERRORS_RO);
+ sb->minor_rev_level = to_le16(0);
+ sb->last_check_time = to_le32(0);
+ sb->check_interval = to_le32(0);
+ sb->creator_os = to_le32(EXT4_SUPERBLOCK_OS_LINUX);
+ sb->rev_level = to_le32(1);
+ sb->def_resuid = to_le16(0);
+ sb->def_resgid = to_le16(0);
+
+ sb->first_inode = to_le32(EXT4_GOOD_OLD_FIRST_INO);
+ sb->inode_size = to_le16(info->inode_size);
+ sb->block_group_index = to_le16(0);
+
+ sb->features_compatible = to_le32(info->feat_compat);
+ sb->features_incompatible = to_le32(info->feat_incompat);
+ sb->features_read_only = to_le32(info->feat_ro_compat);
+
+ memcpy(sb->uuid, info->uuid, UUID_SIZE);
+
+ memset(sb->volume_name, 0, sizeof(sb->volume_name));
+ strncpy(sb->volume_name, info->label, sizeof(sb->volume_name));
+ memset(sb->last_mounted, 0, sizeof(sb->last_mounted));
+
+ sb->algorithm_usage_bitmap = to_le32(0);
+ sb->s_prealloc_blocks = 0;
+ sb->s_prealloc_dir_blocks = 0;
+ sb->s_reserved_gdt_blocks = to_le16(info->bg_desc_reserve_blocks);
+
+ if (info->feat_compat & EXT4_FCOM_HAS_JOURNAL)
+ sb->journal_inode_number = to_le32(EXT4_JOURNAL_INO);
+
+ sb->journal_backup_type = 1;
+ sb->journal_dev = to_le32(0);
+ sb->last_orphan = to_le32(0);
+ sb->hash_seed[0] = to_le32(0x11111111);
+ sb->hash_seed[1] = to_le32(0x22222222);
+ sb->hash_seed[2] = to_le32(0x33333333);
+ sb->hash_seed[3] = to_le32(0x44444444);
+ sb->default_hash_version = EXT2_HTREE_HALF_MD4;
+ sb->checksum_type = 1;
+ sb->desc_size = to_le16(info->dsc_size);
+ sb->default_mount_opts = to_le32(0);
+ sb->first_meta_bg = to_le32(0);
+ sb->mkfs_time = to_le32(0);
+
+ sb->reserved_blocks_count_hi = to_le32(0);
+ sb->min_extra_isize = to_le32(sizeof(struct ext4_inode) -
+ EXT4_GOOD_OLD_INODE_SIZE);
+ sb->want_extra_isize = to_le32(sizeof(struct ext4_inode) -
+ EXT4_GOOD_OLD_INODE_SIZE);
+ sb->flags = to_le32(EXT4_SUPERBLOCK_FLAGS_SIGNED_HASH);
+}
+
+
+static int write_bgroup_block(struct ext4_blockdev *bd,
+ struct fs_aux_info *aux_info,
+ struct ext4_mkfs_info *info,
+ u32int blk)
+{
+ int r = 0;
+ u32int j;
+ struct ext4_block b;
+
+ u32int block_size = ext4_sb_get_block_size(aux_info->sb);
+
+ for (j = 0; j < aux_info->groups; j++) {
+ u64int bg_start_block = aux_info->first_data_block +
+ j * info->blocks_per_group;
+ u32int blk_off = 0;
+
+ blk_off += aux_info->bg_desc_blocks;
+ if (has_superblock(info, j)) {
+ bg_start_block++;
+ blk_off += info->bg_desc_reserve_blocks;
+ USED(blk_off);
+ }
+
+ u64int dsc_blk = bg_start_block + blk;
+
+ r = ext4_block_get_noread(bd, &b, dsc_blk);
+ if (r != 0)
+ return r;
+
+ memcpy(b.data, aux_info->bg_desc_blk, block_size);
+
+ ext4_bcache_set_dirty(b.buf);
+ r = ext4_block_set(bd, &b);
+ if (r != 0)
+ return r;
+ }
+
+ return r;
+}
+
+static int write_bgroups(struct ext4_blockdev *bd, struct fs_aux_info *aux_info,
+ struct ext4_mkfs_info *info)
+{
+ int r;
+
+ struct ext4_block b;
+ struct ext4_bgroup *bg_desc;
+
+ u32int i;
+ u32int bg_free_blk;
+ u64int sb_free_blk = 0;
+ u32int block_size = ext4_sb_get_block_size(aux_info->sb);
+ u32int dsc_size = ext4_sb_get_desc_size(aux_info->sb);
+ u32int dsc_per_block = block_size / dsc_size;
+ u32int k = 0;
+
+ for (i = 0; i < aux_info->groups; i++) {
+ u64int bg_start_block = aux_info->first_data_block +
+ aux_info->first_data_block + i * info->blocks_per_group;
+ u32int blk_off = 0;
+
+ bg_desc = (void *)(aux_info->bg_desc_blk + k * dsc_size);
+ bg_free_blk = info->blocks_per_group -
+ aux_info->inode_table_blocks;
+
+ bg_free_blk -= 2;
+ blk_off += aux_info->bg_desc_blocks;
+
+ if (i == (aux_info->groups - 1))
+ bg_free_blk -= aux_info->first_data_block;
+
+ if (has_superblock(info, i)) {
+ bg_start_block++;
+ blk_off += info->bg_desc_reserve_blocks;
+ bg_free_blk -= info->bg_desc_reserve_blocks + 1;
+ bg_free_blk -= aux_info->bg_desc_blocks;
+ }
+
+ ext4_bg_set_block_bitmap(bg_desc, aux_info->sb,
+ bg_start_block + blk_off + 1);
+
+ ext4_bg_set_inode_bitmap(bg_desc, aux_info->sb,
+ bg_start_block + blk_off + 2);
+
+ ext4_bg_set_inode_table_first_block(bg_desc,
+ aux_info->sb,
+ bg_start_block + blk_off + 3);
+
+ ext4_bg_set_free_blocks_count(bg_desc, aux_info->sb,
+ bg_free_blk);
+
+ ext4_bg_set_free_inodes_count(bg_desc,
+ aux_info->sb, to_le32(aux_info->sb->inodes_per_group));
+
+ ext4_bg_set_used_dirs_count(bg_desc, aux_info->sb, 0);
+
+ ext4_bg_set_flag(bg_desc,
+ EXT4_BLOCK_GROUP_BLOCK_UNINIT |
+ EXT4_BLOCK_GROUP_INODE_UNINIT);
+
+ sb_free_blk += bg_free_blk;
+
+ r = ext4_block_get_noread(bd, &b, bg_start_block + blk_off + 1);
+ if (r != 0)
+ return r;
+ memset(b.data, 0, block_size);
+ ext4_bcache_set_dirty(b.buf);
+ r = ext4_block_set(bd, &b);
+ if (r != 0)
+ return r;
+ r = ext4_block_get_noread(bd, &b, bg_start_block + blk_off + 2);
+ if (r != 0)
+ return r;
+ memset(b.data, 0, block_size);
+ ext4_bcache_set_dirty(b.buf);
+ r = ext4_block_set(bd, &b);
+ if (r != 0)
+ return r;
+
+ if (++k != dsc_per_block)
+ continue;
+
+ k = 0;
+ r = write_bgroup_block(bd, aux_info, info, i / dsc_per_block);
+ if (r != 0)
+ return r;
+
+ }
+
+ r = write_bgroup_block(bd, aux_info, info, i / dsc_per_block);
+ if (r != 0)
+ return r;
+
+ ext4_sb_set_free_blocks_cnt(aux_info->sb, sb_free_blk);
+ return r;
+}
+
+int write_sblocks(struct ext4_blockdev *bd, struct fs_aux_info *aux_info,
+ struct ext4_mkfs_info *info)
+{
+ u64int offset;
+ u32int i;
+ int r;
+
+ /* write out the backup superblocks */
+ for (i = 1; i < aux_info->groups; i++) {
+ if (has_superblock(info, i)) {
+ offset = info->block_size * (aux_info->first_data_block
+ + i * info->blocks_per_group);
+
+ aux_info->sb->block_group_index = to_le16(i);
+ r = ext4_block_writebytes(bd, offset, aux_info->sb,
+ EXT4_SUPERBLOCK_SIZE);
+ if (r != 0)
+ return r;
+ }
+ }
+
+ /* write out the primary superblock */
+ aux_info->sb->block_group_index = to_le16(0);
+ return ext4_block_writebytes(bd, EXT4_SUPERBLOCK_OFFSET, aux_info->sb,
+ EXT4_SUPERBLOCK_SIZE);
+}
+
+
+int ext4_mkfs_read_info(struct ext4_blockdev *bd, struct ext4_mkfs_info *info)
+{
+ int r;
+ struct ext4_sblock *sb;
+ r = ext4_block_init(bd);
+ if (r != 0)
+ return r;
+
+ sb = ext4_malloc(EXT4_SUPERBLOCK_SIZE);
+ if (!sb)
+ goto Finish;
+
+
+ r = ext4_sb_read(bd, sb);
+ if (r != 0)
+ goto Finish;
+
+ r = sb2info(sb, info);
+
+Finish:
+ if (sb)
+ ext4_free(sb);
+ ext4_block_fini(bd);
+ return r;
+}
+
+static int mkfs_init(struct ext4_blockdev *bd, struct ext4_mkfs_info *info)
+{
+ int r;
+ struct fs_aux_info aux_info;
+ memset(&aux_info, 0, sizeof(struct fs_aux_info));
+
+ r = create_fs_aux_info(&aux_info, info);
+ if (r != 0)
+ goto Finish;
+
+ fill_sb(&aux_info, info);
+
+ r = write_bgroups(bd, &aux_info, info);
+ if (r != 0)
+ goto Finish;
+
+ r = write_sblocks(bd, &aux_info, info);
+ if (r != 0)
+ goto Finish;
+
+ Finish:
+ release_fs_aux_info(&aux_info);
+ return r;
+}
+
+static int init_bgs(struct ext4_fs *fs)
+{
+ int r = 0;
+ struct ext4_block_group_ref ref;
+ u32int i;
+ u32int bg_count = ext4_block_group_cnt(&fs->sb);
+ for (i = 0; i < bg_count; ++i) {
+ r = ext4_fs_get_block_group_ref(fs, i, &ref);
+ if (r != 0)
+ break;
+
+ r = ext4_fs_put_block_group_ref(&ref);
+ if (r != 0)
+ break;
+ }
+ return r;
+}
+
+static int alloc_inodes(struct ext4_fs *fs)
+{
+ int r = 0;
+ int i;
+ struct ext4_inode_ref inode_ref;
+ for (i = 1; i < 12; ++i) {
+ int filetype = EXT4_DE_REG_FILE;
+
+ switch (i) {
+ case EXT4_ROOT_INO:
+ case EXT4_GOOD_OLD_FIRST_INO:
+ filetype = EXT4_DE_DIR;
+ break;
+ default:
+ break;
+ }
+
+ r = ext4_fs_alloc_inode(fs, &inode_ref, filetype);
+ if (r != 0)
+ return r;
+
+ ext4_inode_set_mode(&fs->sb, inode_ref.inode, 0);
+
+ switch (i) {
+ case EXT4_ROOT_INO:
+ case EXT4_JOURNAL_INO:
+ ext4_fs_inode_blocks_init(fs, &inode_ref);
+ break;
+ }
+
+ ext4_fs_put_inode_ref(&inode_ref);
+ }
+
+ return r;
+}
+
+static int create_dirs(struct ext4_fs *fs)
+{
+ int r;
+ struct ext4_inode_ref root;
+ struct ext4_inode_ref child;
+
+ r = ext4_fs_get_inode_ref(fs, EXT4_ROOT_INO, &root);
+ if (r != 0)
+ return r;
+
+ r = ext4_fs_get_inode_ref(fs, EXT4_GOOD_OLD_FIRST_INO, &child);
+ if (r != 0)
+ return r;
+
+ ext4_inode_set_mode(&fs->sb, child.inode,
+ EXT4_INODE_MODE_DIRECTORY | 0777);
+
+ ext4_inode_set_mode(&fs->sb, root.inode,
+ EXT4_INODE_MODE_DIRECTORY | 0777);
+
+ /* Initialize directory index if supported */
+ if (ext4_sb_feature_com(&fs->sb, EXT4_FCOM_DIR_INDEX)) {
+ r = ext4_dir_dx_init(&root, &root);
+ if (r != 0)
+ return r;
+
+ r = ext4_dir_dx_init(&child, &root);
+ if (r != 0)
+ return r;
+
+ ext4_inode_set_flag(root.inode, EXT4_INODE_FLAG_INDEX);
+ ext4_inode_set_flag(child.inode, EXT4_INODE_FLAG_INDEX);
+ } else {
+ r = ext4_dir_add_entry(&root, ".", strlen("."), &root);
+ if (r != 0)
+ return r;
+
+ r = ext4_dir_add_entry(&root, "..", strlen(".."), &root);
+ if (r != 0)
+ return r;
+
+ r = ext4_dir_add_entry(&child, ".", strlen("."), &child);
+ if (r != 0)
+ return r;
+
+ r = ext4_dir_add_entry(&child, "..", strlen(".."), &root);
+ if (r != 0)
+ return r;
+ }
+
+ r = ext4_dir_add_entry(&root, "lost+found", strlen("lost+found"), &child);
+ if (r != 0)
+ return r;
+
+ ext4_inode_set_links_cnt(root.inode, 3);
+ ext4_inode_set_links_cnt(child.inode, 2);
+
+ child.dirty = true;
+ root.dirty = true;
+ ext4_fs_put_inode_ref(&child);
+ ext4_fs_put_inode_ref(&root);
+ return r;
+}
+
+static int create_journal_inode(struct ext4_fs *fs,
+ struct ext4_mkfs_info *info)
+{
+ int ret;
+ struct ext4_inode_ref inode_ref;
+ u64int blocks_count;
+
+ if (!info->journal)
+ return 0;
+
+ ret = ext4_fs_get_inode_ref(fs, EXT4_JOURNAL_INO, &inode_ref);
+ if (ret != 0)
+ return ret;
+
+ struct ext4_inode *inode = inode_ref.inode;
+
+ ext4_inode_set_mode(&fs->sb, inode, EXT4_INODE_MODE_FILE | 0600);
+ ext4_inode_set_links_cnt(inode, 1);
+
+ blocks_count = ext4_inode_get_blocks_count(&fs->sb, inode);
+
+ while (blocks_count++ < info->journal_blocks)
+ {
+ ext4_fsblk_t fblock;
+ ext4_lblk_t iblock;
+ struct ext4_block blk;
+
+ ret = ext4_fs_append_inode_dblk(&inode_ref, &fblock, &iblock);
+ if (ret != 0)
+ goto Finish;
+
+ if (iblock != 0)
+ continue;
+
+ ret = ext4_block_get(fs->bdev, &blk, fblock);
+ if (ret != 0)
+ goto Finish;
+
+
+ struct jbd_sb * jbd_sb = (struct jbd_sb * )blk.data;
+ memset(jbd_sb, 0, sizeof(struct jbd_sb));
+
+ jbd_sb->header.magic = to_be32(JBD_MAGIC_NUMBER);
+ jbd_sb->header.blocktype = to_be32(JBD_SUPERBLOCK_V2);
+ jbd_sb->blocksize = to_be32(info->block_size);
+ jbd_sb->maxlen = to_be32(info->journal_blocks);
+ jbd_sb->nr_users = to_be32(1);
+ jbd_sb->first = to_be32(1);
+ jbd_sb->sequence = to_be32(1);
+
+ ext4_bcache_set_dirty(blk.buf);
+ ret = ext4_block_set(fs->bdev, &blk);
+ if (ret != 0)
+ goto Finish;
+ }
+
+ memcpy(fs->sb.journal_blocks, inode->blocks, sizeof(inode->blocks));
+
+ Finish:
+ ext4_fs_put_inode_ref(&inode_ref);
+
+ return ret;
+}
+
+int ext4_mkfs(struct ext4_fs *fs, struct ext4_blockdev *bd,
+ struct ext4_mkfs_info *info, int fs_type)
+{
+ int r;
+
+ r = ext4_block_init(bd);
+ if (r != 0)
+ return r;
+
+ bd->fs = fs;
+
+ if (info->len == 0)
+ info->len = bd->part_size;
+
+ if (info->block_size == 0)
+ info->block_size = 4096; /*Set block size to default value*/
+
+ /* Round down the filesystem length to be a multiple of the block size */
+ info->len &= ~((u64int)info->block_size - 1);
+
+ if (info->journal_blocks == 0)
+ info->journal_blocks = compute_journal_blocks(info);
+
+ if (info->blocks_per_group == 0)
+ info->blocks_per_group = compute_blocks_per_group(info);
+
+ if (info->inodes == 0)
+ info->inodes = compute_inodes(info);
+
+ if (info->inode_size == 0)
+ info->inode_size = 256;
+
+ info->inodes_per_group = compute_inodes_per_group(info);
+
+ switch (fs_type) {
+ case 2:
+ info->feat_compat = EXT2_SUPPORTED_FCOM;
+ info->feat_ro_compat = EXT2_SUPPORTED_FRO_COM;
+ info->feat_incompat = EXT2_SUPPORTED_FINCOM;
+ break;
+ case 3:
+ info->feat_compat = EXT3_SUPPORTED_FCOM;
+ info->feat_ro_compat = EXT3_SUPPORTED_FRO_COM;
+ info->feat_incompat = EXT3_SUPPORTED_FINCOM;
+ break;
+ case 4:
+ info->feat_compat = EXT4_SUPPORTED_FCOM;
+ info->feat_ro_compat = EXT4_SUPPORTED_FRO_COM;
+ info->feat_incompat = EXT4_SUPPORTED_FINCOM;
+ break;
+ }
+
+ /*TODO: handle this features some day...*/
+ info->feat_incompat &= ~EXT4_FINCOM_META_BG;
+ info->feat_incompat &= ~EXT4_FINCOM_FLEX_BG;
+ info->feat_incompat &= ~EXT4_FINCOM_64BIT;
+
+ info->feat_ro_compat &= ~EXT4_FRO_COM_METADATA_CSUM;
+ info->feat_ro_compat &= ~EXT4_FRO_COM_GDT_CSUM;
+ info->feat_ro_compat &= ~EXT4_FRO_COM_DIR_NLINK;
+ info->feat_ro_compat &= ~EXT4_FRO_COM_EXTRA_ISIZE;
+ info->feat_ro_compat &= ~EXT4_FRO_COM_HUGE_FILE;
+
+ if (info->journal)
+ info->feat_compat |= EXT4_FCOM_HAS_JOURNAL;
+
+ if (info->dsc_size == 0) {
+
+ if (info->feat_incompat & EXT4_FINCOM_64BIT)
+ info->dsc_size = EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE;
+ else
+ info->dsc_size = EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE;
+ }
+
+ info->bg_desc_reserve_blocks = 0;
+
+ ext4_dbg(DEBUG_MKFS, DBG_INFO "Creating filesystem with parameters:\n");
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "Size: %llud\n", info->len);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "Block size: %ud\n",
+ info->block_size);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "Blocks per group: %ud\n",
+ info->blocks_per_group);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "Inodes per group: %ud\n",
+ info->inodes_per_group);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "Inode size: %ud\n",
+ info->inode_size);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "Inodes: %ud\n", info->inodes);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "Journal blocks: %ud\n",
+ info->journal_blocks);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "Features ro_compat: 0x%x\n",
+ info->feat_ro_compat);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "Features compat: 0x%x\n",
+ info->feat_compat);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "Features incompat: 0x%x\n",
+ info->feat_incompat);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "BG desc reserve: %ud\n",
+ info->bg_desc_reserve_blocks);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "Descriptor size: %uhd\n",
+ info->dsc_size);
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "journal: %s\n",
+ info->journal ? "yes" : "no");
+ ext4_dbg(DEBUG_MKFS, DBG_NONE "Label: %s\n", info->label);
+
+ struct ext4_bcache bc;
+
+ memset(&bc, 0, sizeof(struct ext4_bcache));
+ ext4_block_set_lb_size(bd, info->block_size);
+
+ r = ext4_bcache_init_dynamic(&bc, CONFIG_BLOCK_DEV_CACHE_SIZE,
+ info->block_size);
+ if (r != 0)
+ goto block_fini;
+
+ /*Bind block cache to block device*/
+ r = ext4_block_bind_bcache(bd, &bc);
+ if (r != 0)
+ goto cache_fini;
+
+ r = ext4_block_cache_write_back(bd, 1);
+ if (r != 0)
+ goto cache_fini;
+
+ r = mkfs_init(bd, info);
+ if (r != 0)
+ goto cache_fini;
+
+ r = ext4_fs_init(fs, bd, false);
+ if (r != 0)
+ goto cache_fini;
+
+ r = init_bgs(fs);
+ if (r != 0)
+ goto fs_fini;
+
+ r = alloc_inodes(fs);
+ if (r != 0)
+ goto fs_fini;
+
+ r = create_dirs(fs);
+ if (r != 0)
+ goto fs_fini;
+
+ r = create_journal_inode(fs, info);
+ if (r != 0)
+ goto fs_fini;
+
+ fs_fini:
+ ext4_fs_fini(fs);
+
+ cache_fini:
+ ext4_block_cache_write_back(bd, 0);
+ ext4_bcache_fini_dynamic(&bc);
+
+ block_fini:
+ ext4_block_fini(bd);
+
+ return r;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_super.c
@@ -1,0 +1,221 @@
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_debug.h"
+#include "ext4_super.h"
+#include "ext4_crc32.h"
+
+u32int ext4_block_group_cnt(struct ext4_sblock *s)
+{
+ u64int blocks_count = ext4_sb_get_blocks_cnt(s);
+ u32int blocks_per_group = ext4_get32(s, blocks_per_group);
+
+ u32int block_groups_count = (u32int)(blocks_count / blocks_per_group);
+
+ if (blocks_count % blocks_per_group)
+ block_groups_count++;
+
+ return block_groups_count;
+}
+
+u32int ext4_blocks_in_group_cnt(struct ext4_sblock *s, u32int bgid)
+{
+ u32int block_group_count = ext4_block_group_cnt(s);
+ u32int blocks_per_group = ext4_get32(s, blocks_per_group);
+ u64int total_blocks = ext4_sb_get_blocks_cnt(s);
+
+ if (bgid < block_group_count - 1)
+ return blocks_per_group;
+
+ return (u32int)(total_blocks - ((block_group_count - 1) * blocks_per_group));
+}
+
+u32int ext4_inodes_in_group_cnt(struct ext4_sblock *s, u32int bgid)
+{
+ u32int block_group_count = ext4_block_group_cnt(s);
+ u32int inodes_per_group = ext4_get32(s, inodes_per_group);
+ u32int total_inodes = ext4_get32(s, inodes_count);
+
+ if (bgid < block_group_count - 1)
+ return inodes_per_group;
+
+ return (total_inodes - ((block_group_count - 1) * inodes_per_group));
+}
+
+static u32int ext4_sb_csum(struct ext4_sblock *s)
+{
+
+ return ext4_crc32c(EXT4_CRC32_INIT, s,
+ offsetof(struct ext4_sblock, checksum));
+}
+
+static bool ext4_sb_verify_csum(struct ext4_sblock *s)
+{
+ if (!ext4_sb_feature_ro_com(s, EXT4_FRO_COM_METADATA_CSUM))
+ return true;
+
+ if (s->checksum_type != to_le32(EXT4_CHECKSUM_CRC32C))
+ return false;
+
+ return s->checksum == to_le32(ext4_sb_csum(s));
+}
+
+void ext4_sb_set_csum(struct ext4_sblock *s)
+{
+ if (!ext4_sb_feature_ro_com(s, EXT4_FRO_COM_METADATA_CSUM))
+ return;
+
+ s->checksum = to_le32(ext4_sb_csum(s));
+}
+
+int ext4_sb_write(struct ext4_blockdev *bdev, struct ext4_sblock *s)
+{
+ ext4_sb_set_csum(s);
+ return ext4_block_writebytes(bdev, EXT4_SUPERBLOCK_OFFSET, s,
+ EXT4_SUPERBLOCK_SIZE);
+}
+
+int ext4_sb_read(struct ext4_blockdev *bdev, struct ext4_sblock *s)
+{
+ return ext4_block_readbytes(bdev, EXT4_SUPERBLOCK_OFFSET, s,
+ EXT4_SUPERBLOCK_SIZE);
+}
+
+bool ext4_sb_check(struct ext4_sblock *s)
+{
+ if (ext4_get16(s, magic) != EXT4_SUPERBLOCK_MAGIC)
+ return false;
+
+ if (ext4_get32(s, inodes_count) == 0)
+ return false;
+
+ if (ext4_sb_get_blocks_cnt(s) == 0)
+ return false;
+
+ if (ext4_get32(s, blocks_per_group) == 0)
+ return false;
+
+ if (ext4_get32(s, inodes_per_group) == 0)
+ return false;
+
+ if (ext4_get16(s, inode_size) < 128)
+ return false;
+
+ if (ext4_get32(s, first_inode) < 11)
+ return false;
+
+ if (ext4_sb_get_desc_size(s) < EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ return false;
+
+ if (ext4_sb_get_desc_size(s) > EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ return false;
+
+ if (!ext4_sb_verify_csum(s))
+ return false;
+
+ return true;
+}
+
+static inline int is_power_of(u32int a, u32int b)
+{
+ while (1) {
+ if (a < b)
+ return 0;
+ if (a == b)
+ return 1;
+ if ((a % b) != 0)
+ return 0;
+ a = a / b;
+ }
+}
+
+bool ext4_sb_sparse(u32int group)
+{
+ if (group <= 1)
+ return 1;
+
+ if (!(group & 1))
+ return 0;
+
+ return (is_power_of(group, 7) || is_power_of(group, 5) ||
+ is_power_of(group, 3));
+}
+
+bool ext4_sb_is_super_in_bg(struct ext4_sblock *s, u32int group)
+{
+ if (ext4_sb_feature_ro_com(s, EXT4_FRO_COM_SPARSE_SUPER) &&
+ !ext4_sb_sparse(group))
+ return false;
+ return true;
+}
+
+static u32int ext4_bg_num_gdb_meta(struct ext4_sblock *s, u32int group)
+{
+ u32int dsc_per_block =
+ ext4_sb_get_block_size(s) / ext4_sb_get_desc_size(s);
+
+ u32int metagroup = group / dsc_per_block;
+ u32int first = metagroup * dsc_per_block;
+ u32int last = first + dsc_per_block - 1;
+
+ if (group == first || group == first + 1 || group == last)
+ return 1;
+ return 0;
+}
+
+static u32int ext4_bg_num_gdb_nometa(struct ext4_sblock *s, u32int group)
+{
+ if (!ext4_sb_is_super_in_bg(s, group))
+ return 0;
+ u32int dsc_per_block =
+ ext4_sb_get_block_size(s) / ext4_sb_get_desc_size(s);
+
+ u32int db_count =
+ (ext4_block_group_cnt(s) + dsc_per_block - 1) / dsc_per_block;
+
+ if (ext4_sb_feature_incom(s, EXT4_FINCOM_META_BG))
+ return ext4_sb_first_meta_bg(s);
+
+ return db_count;
+}
+
+u32int ext4_bg_num_gdb(struct ext4_sblock *s, u32int group)
+{
+ u32int dsc_per_block =
+ ext4_sb_get_block_size(s) / ext4_sb_get_desc_size(s);
+ u32int first_meta_bg = ext4_sb_first_meta_bg(s);
+ u32int metagroup = group / dsc_per_block;
+
+ if (!ext4_sb_feature_incom(s,EXT4_FINCOM_META_BG) ||
+ metagroup < first_meta_bg)
+ return ext4_bg_num_gdb_nometa(s, group);
+
+ return ext4_bg_num_gdb_meta(s, group);
+}
+
+u32int ext4_num_base_meta_clusters(struct ext4_sblock *s,
+ u32int block_group)
+{
+ u32int num;
+ u32int dsc_per_block =
+ ext4_sb_get_block_size(s) / ext4_sb_get_desc_size(s);
+
+ num = ext4_sb_is_super_in_bg(s, block_group);
+
+ if (!ext4_sb_feature_incom(s, EXT4_FINCOM_META_BG) ||
+ block_group < ext4_sb_first_meta_bg(s) * dsc_per_block) {
+ if (num) {
+ num += ext4_bg_num_gdb(s, block_group);
+ num += ext4_get16(s, s_reserved_gdt_blocks);
+ }
+ } else {
+ num += ext4_bg_num_gdb(s, block_group);
+ }
+
+ u32int clustersize = 1024 << ext4_get32(s, log_cluster_size);
+ u32int cluster_ratio = clustersize / ext4_sb_get_block_size(s);
+ u32int v =
+ (num + cluster_ratio - 1) >> ext4_get32(s, log_cluster_size);
+
+ return v;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4_trans.c
@@ -1,0 +1,61 @@
+#include "ext4_config.h"
+#include "ext4.h"
+#include "ext4_fs.h"
+#include "ext4_journal.h"
+
+int ext4_trans_set_block_dirty(struct ext4_buf *buf)
+{
+ int r = 0;
+
+ struct ext4_fs *fs = buf->bc->bdev->fs;
+ struct ext4_block block = {
+ .lb_id = buf->lba,
+ .data = buf->data,
+ .buf = buf
+ };
+
+ if (fs->jbd_journal && fs->curr_trans) {
+ struct jbd_trans *trans = fs->curr_trans;
+ return jbd_trans_set_block_dirty(trans, &block);
+ }
+
+ ext4_bcache_set_dirty(buf);
+ return r;
+}
+
+int ext4_trans_block_get_noread(struct ext4_blockdev *bdev,
+ struct ext4_block *b,
+ u64int lba)
+{
+ int r = ext4_block_get_noread(bdev, b, lba);
+ if (r != 0)
+ return r;
+
+ return r;
+}
+
+int ext4_trans_block_get(struct ext4_blockdev *bdev,
+ struct ext4_block *b,
+ u64int lba)
+{
+ int r = ext4_block_get(bdev, b, lba);
+ if (r != 0)
+ return r;
+
+ return r;
+}
+
+int ext4_trans_try_revoke_block(struct ext4_blockdev *bdev, u64int lba)
+{
+ int r = 0;
+
+ struct ext4_fs *fs = bdev->fs;
+ if (fs->jbd_journal && fs->curr_trans) {
+ struct jbd_trans *trans = fs->curr_trans;
+ r = jbd_trans_try_revoke_block(trans, lba);
+ } else if (fs->jbd_journal) {
+ r = ext4_block_flush_lba(fs->bdev, lba);
+ }
+
+ return r;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/ext4srv.c
@@ -1,0 +1,1050 @@
+#include "ext4_config.h"
+#include "ext4.h"
+#include <fcall.h>
+#include <thread.h>
+#include <9p.h>
+#include <bio.h>
+#include "ext4_inode.h"
+#include "group.h"
+#include "common.h"
+
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+int mainstacksize = 65536;
+
+typedef struct Aux Aux;
+
+struct Aux {
+ Part *p;
+ u32int uid;
+ char *path;
+ int doff;
+ union {
+ ext4_file *file;
+ ext4_dir *dir;
+ };
+ int type;
+};
+
+enum {
+ Adir,
+ Afile,
+};
+
+static Opts opts = {
+ .group = nil,
+ .cachewb = 0,
+ .asroot = 0,
+ .rdonly = 0,
+ .linkmode = Lhide,
+
+ .fstype = -1,
+ .blksz = 1024,
+ .label = "",
+ .inodesz = 256,
+ .ninode = 0,
+};
+static u32int Root;
+static u8int zero[65536];
+static char *srvname = "ext4";
+
+static char *
+linkresolve(Aux *a, char *s, char **value)
+{
+ char *q, buf[4096+1];
+ usize sz;
+ int res;
+
+ res = 0;
+ if(opts.linkmode == Lresolve && (res = ext4_readlink(s, buf, sizeof(buf), &sz)) == 0){
+ if(sz == sizeof(buf)){
+ werrstr("readlink: %s: path too long", s);
+ free(s);
+ return nil;
+ }
+
+ buf[sz] = 0;
+ if(value != nil)
+ *value = strdup(buf);
+ cleanname(buf);
+ if(buf[0] == '/'){
+ free(s);
+ s = smprint("%M%s", a->p, buf);
+ }else{
+ q = strrchr(s, '/');
+ *q = 0;
+ q = s;
+ s = smprint("%s/%s", q, buf);
+ free(q);
+ cleanname(strchr(s+1, '/'));
+ }
+ }else{
+ if(res != 0)
+ werrstr("readlink: %s: %r", s);
+ if(value != nil)
+ *value = nil;
+ }
+
+ return s;
+}
+
+static char *
+fullpath(Aux *a)
+{
+ return linkresolve(a, smprint("%M/%s", a->p, a->path), nil);
+}
+
+static int
+haveperm(Aux *a, int p, struct ext4_inode *inodeout)
+{
+ struct ext4_inode inode;
+ u32int ino, id;
+ int m, fm;
+ Group *g;
+ char *s;
+
+ switch(p & 3){
+ case OREAD:
+ p = AREAD;
+ break;
+ case OWRITE:
+ p = AWRITE;
+ break;
+ case ORDWR:
+ p = AREAD|AWRITE;
+ break;
+ case OEXEC:
+ p = AEXEC;
+ break;
+ default:
+ return 0;
+ }
+ if(p & OTRUNC)
+ p |= AWRITE;
+
+ if((s = fullpath(a)) == nil)
+ return -1;
+ if(ext4_raw_inode_fill(s, &ino, &inode) != 0){
+ werrstr("%s: %r", s);
+ free(s);
+ return -1;
+ }
+ free(s);
+
+ if(inodeout != nil)
+ memmove(inodeout, &inode, sizeof(inode));
+
+ fm = ext4_inode_get_mode(a->p->sb, &inode);
+
+ /* other */
+ m = fm & 7;
+ if((p & m) == p)
+ return 1;
+
+ /* owner */
+ id = ext4_inode_get_uid(&inode);
+ if(a->uid == Root || ((g = findgroupid(&a->p->groups, id)) != nil && ingroup(g, a->uid))){
+ m |= (fm >> 6) & 7;
+ if((p & m) == p)
+ return 1;
+ }
+
+ /* group */
+ id = ext4_inode_get_gid(&inode);
+ if(a->uid == Root || ((g = findgroupid(&a->p->groups, id)) != nil && ingroup(g, a->uid))){
+ m |= (fm >> 3) & 7;
+ if((p & m) == p)
+ return 1;
+ }
+
+ return 0;
+}
+
+static void
+rattach(Req *r)
+{
+ char err[ERRMAX];
+ Aux *a;
+
+ if((a = calloc(1, sizeof(*a))) == nil)
+ respond(r, "memory");
+ else if((a->p = openpart(r->ifcall.aname, &opts)) == nil){
+ free(a);
+ rerrstr(err, sizeof(err));
+ respond(r, err);
+ }else{
+ if(opts.asroot || findgroup(&a->p->groups, r->ifcall.uname, &a->uid) == nil)
+ a->uid = Root;
+
+ incref(a->p);
+ a->type = Adir;
+ a->path = strdup("");
+ r->ofcall.qid = a->p->qidmask;
+ r->fid->qid = a->p->qidmask;
+ r->fid->aux = a;
+ respond(r, nil);
+ }
+}
+
+static u32int
+toext4mode(u32int mode, u32int perm, int creat)
+{
+ u32int e;
+
+ e = 0;
+ mode &= ~OCEXEC;
+
+ if(mode & OTRUNC)
+ e |= O_TRUNC;
+
+ mode &= 3;
+ if(mode == OWRITE)
+ e |= O_WRONLY;
+ else if(mode == ORDWR)
+ e |= O_RDWR;
+
+ if(creat)
+ e |= O_CREAT;
+
+ if(perm & DMEXCL)
+ e |= O_EXCL;
+ if(perm & DMAPPEND)
+ e |= O_APPEND;
+
+ return e;
+}
+
+static void
+ropen(Req *r)
+{
+ char *path;
+ int res;
+ Aux *a;
+
+ a = r->fid->aux;
+ switch(a->type){
+ case Adir:
+ if(r->ifcall.mode != OREAD || !haveperm(a, r->ifcall.mode, nil)){
+ respond(r, Eperm);
+ return;
+ }
+ if(a->dir != nil){
+ respond(r, "double open");
+ return;
+ }
+ if((a->dir = malloc(sizeof(*a->dir))) == nil)
+ goto Nomem;
+ if((path = smprint("%M/%s", a->p, a->path)) == nil){
+ free(a->dir);
+ a->dir = nil;
+ goto Nomem;
+ }
+ res = ext4_dir_open(a->dir, path);
+ free(path);
+ if(res != 0){
+ free(a->dir);
+ a->dir = nil;
+ responderror(r);
+ return;
+ }
+ break;
+
+ case Afile:
+ if(!haveperm(a, r->ifcall.mode, nil)){
+ respond(r, Eperm);
+ return;
+ }
+ if(a->file != nil){
+ respond(r, "double open");
+ return;
+ }
+ if((a->file = malloc(sizeof(*a->file))) == nil)
+ goto Nomem;
+ if((path = smprint("%M/%s", a->p, a->path)) == nil){
+ free(a->file);
+ a->file = nil;
+ goto Nomem;
+ }
+ res = ext4_fopen2(a->file, path, toext4mode(r->ifcall.mode, 0, 0));
+ free(path);
+ if(res != 0){
+ free(a->file);
+ a->file = nil;
+ responderror(r);
+ return;
+ }
+ break;
+
+Nomem:
+ respond(r, "memory");
+ return;
+ }
+
+ r->ofcall.iounit = 0;
+
+ respond(r, nil);
+}
+
+static void
+rcreate(Req *r)
+{
+ u32int perm, dirperm, t;
+ struct ext4_inode inode;
+ char *s, *q;
+ int mkdir;
+ long tm;
+ Aux *a;
+
+ a = r->fid->aux;
+ s = nil;
+
+ if(a->file != nil || a->dir != nil){
+ werrstr("double create");
+ goto error;
+ }
+ if(!haveperm(a, OWRITE, &inode)){
+ werrstr(Eperm);
+ goto error;
+ }
+
+ /* first make sure this is a directory */
+ t = ext4_inode_type(a->p->sb, &inode);
+ if(t != EXT4_INODE_MODE_DIRECTORY){
+ werrstr("create in non-directory");
+ goto error;
+ }
+
+ if((s = fullpath(a)) == nil)
+ goto error;
+ ext4_mode_get(s, &dirperm);
+
+ /* check if the entry already exists */
+ if((q = smprint("%s/%s", s, r->ifcall.name)) == nil){
+Nomem:
+ werrstr("memory");
+ goto error;
+ }
+ free(s);
+ s = q;
+ cleanname(s);
+ if(ext4_inode_exist(s, EXT4_DE_UNKNOWN) == 0){
+ werrstr("file already exists");
+ goto error;
+ }
+
+ mkdir = r->ifcall.perm & DMDIR;
+ perm = mkdir ? 0666 : 0777;
+ perm = r->ifcall.perm & (~perm | (dirperm & perm));
+
+ if(mkdir){
+ a->type = Adir;
+ if(ext4_dir_mk(s) != 0)
+ goto error;
+ if((a->dir = malloc(sizeof(*a->dir))) == nil)
+ goto Nomem;
+ if(ext4_dir_open(a->dir, s) < 0){
+ free(a->dir);
+ a->dir = nil;
+ goto ext4errorrm;
+ }
+ }else{
+ a->type = Afile;
+ if((a->file = malloc(sizeof(*a->file))) == nil)
+ goto Nomem;
+ if(ext4_fopen2(a->file, s, toext4mode(r->ifcall.mode, perm, 1)) < 0){
+ free(a->file);
+ a->file = nil;
+ goto error;
+ }
+ }
+
+ if(ext4_mode_set(s, perm) < 0)
+ goto ext4errorrm;
+ ext4_owner_set(s, a->uid, a->uid);
+ tm = time(nil);
+ ext4_mtime_set(s, tm);
+ ext4_ctime_set(s, tm);
+
+ r->fid->qid.path = a->p->qidmask.path | a->file->inode;
+ r->fid->qid.vers = 0;
+ r->fid->qid.type = 0;
+ r->ofcall.qid = r->fid->qid;
+
+ free(a->path);
+ a->path = strdup(strchr(s+1, '/')+1);
+ free(s);
+ r->ofcall.iounit = 0;
+ respond(r, nil);
+ return;
+
+ext4errorrm:
+ if(mkdir)
+ ext4_dir_rm(s);
+ else
+ ext4_fremove(s);
+error:
+ free(s);
+ responderror(r);
+}
+
+static int
+dirfill(Dir *dir, Aux *a, char *path)
+{
+ struct ext4_inode inode;
+ u32int t, ino, id;
+ char tmp[16];
+ char *s, *q;
+ Group *g;
+
+ memset(dir, 0, sizeof(*dir));
+
+ if(path == nil){
+ path = a->path;
+ s = smprint("%M/%s", a->p, a->path);
+ }else{
+ if(*a->path == 0 && *path == 0)
+ path = "/";
+ s = smprint("%M%s%s/%s", a->p, *a->path ? "/" : "", a->path, path);
+ }
+ if((s = linkresolve(a, s, nil)) == nil)
+ return -1;
+ if(ext4_raw_inode_fill(s, &ino, &inode) < 0){
+ werrstr("inode: %s: %r", s);
+ free(s);
+ return -1;
+ }
+
+ t = ext4_inode_type(a->p->sb, &inode);
+ if(opts.linkmode == Lhide && t == EXT4_INODE_MODE_SOFTLINK){
+ werrstr("softlinks resolving disabled");
+ free(s);
+ return -1;
+ }
+
+ dir->mode = ext4_inode_get_mode(a->p->sb, &inode) & 0x1ff;
+ dir->qid.path = a->p->qidmask.path | ino;
+ dir->qid.vers = ext4_inode_get_generation(&inode);
+ dir->qid.type = 0;
+ if(t == EXT4_INODE_MODE_DIRECTORY){
+ dir->qid.type |= QTDIR;
+ dir->mode |= DMDIR;
+ }else
+ dir->length = ext4_inode_get_size(a->p->sb, &inode);
+ if(ext4_inode_get_flags(&inode) & EXT4_INODE_FLAG_APPEND){
+ dir->qid.type |= QTAPPEND;
+ dir->mode |= DMAPPEND;
+ }
+
+ if((q = strrchr(path, '/')) != nil)
+ path = q+1;
+ dir->name = estrdup9p(path);
+ dir->atime = ext4_inode_get_access_time(&inode);
+ dir->mtime = ext4_inode_get_modif_time(&inode);
+
+ sprint(tmp, "%ud", id = ext4_inode_get_uid(&inode));
+ dir->uid = estrdup9p((g = findgroupid(&a->p->groups, id)) != nil ? g->name : tmp);
+
+ sprint(tmp, "%ud", id = ext4_inode_get_gid(&inode));
+ dir->gid = estrdup9p((g = findgroupid(&a->p->groups, id)) != nil ? g->name : tmp);
+
+ free(s);
+
+ return 0;
+}
+
+static int
+dirgen(int n, Dir *dir, void *aux)
+{
+ const ext4_direntry *e;
+ Aux *a;
+
+ a = aux;
+ if(n == 0 || n != a->doff){
+ ext4_dir_entry_rewind(a->dir);
+ a->doff = 0;
+ }
+
+ for(;;){
+ do{
+ if((e = ext4_dir_entry_next(a->dir)) == nil)
+ return -1;
+ }while(e->name == nil || strcmp((char*)e->name, ".") == 0 || strcmp((char*)e->name, "..") == 0);
+
+ if(opts.linkmode == Lhide && e->inode_type == EXT4_DE_SYMLINK)
+ continue;
+
+ if(a->doff++ != n)
+ continue;
+
+ if(dirfill(dir, a, (char*)e->name) == 0)
+ return 0;
+
+ a->doff--;
+ }
+}
+
+static void
+rread(Req *r)
+{
+ usize n;
+ Aux *a;
+
+ a = r->fid->aux;
+ if(a->type == Adir && a->dir != nil){
+ dirread9p(r, dirgen, a);
+ }else if(a->type == Afile && a->file != nil){
+ if(ext4_fseek(a->file, r->ifcall.offset, 0) != 0)
+ n = 0;
+ else if(ext4_fread(a->file, r->ofcall.data, r->ifcall.count, &n) < 0){
+ responderror(r);
+ return;
+ }
+
+ r->ofcall.count = n;
+ }
+
+ respond(r, nil);
+}
+
+static void
+rwrite(Req *r)
+{
+ usize n, sz;
+ Aux *a;
+
+ a = r->fid->aux;
+ if(a->type == Adir){
+ respond(r, "can't write to dir");
+ }else if(a->type == Afile){
+ while(ext4_fsize(a->file) < r->ifcall.offset){
+ ext4_fseek(a->file, 0, 2);
+ sz = MIN(r->ifcall.offset-ext4_fsize(a->file), sizeof(zero));
+ if(ext4_fwrite(a->file, zero, sz, &n) < 0)
+ goto error;
+ }
+ if(ext4_fseek(a->file, r->ifcall.offset, 0) < 0)
+ goto error;
+ if(ext4_fwrite(a->file, r->ifcall.data, r->ifcall.count, &n) < 0)
+ goto error;
+
+ r->ofcall.count = n;
+ respond(r, nil);
+ }
+
+ return;
+
+error:
+ responderror(r);
+}
+
+static void
+rremove(Req *r)
+{
+ struct ext4_inode inode;
+ const ext4_direntry *e;
+ u32int ino, t, empty;
+ ext4_dir dir;
+ Group *g;
+ char *s;
+ Aux *a;
+
+ a = r->fid->aux;
+
+ /* do not resolve links here as most likely it's JUST the link we want to remove */
+ if((s = smprint("%M/%s", a->p, a->path)) == nil){
+ werrstr("memory");
+ goto error;
+ }
+ if(ext4_raw_inode_fill(s, &ino, &inode) < 0)
+ goto error;
+
+ if(a->uid == Root || ((g = findgroupid(&a->p->groups, ext4_inode_get_uid(&inode))) != nil && g->id == a->uid)){
+ t = ext4_inode_type(a->p->sb, &inode);
+ if(t == EXT4_INODE_MODE_DIRECTORY && ext4_dir_open(&dir, s) == 0){
+ for(empty = 1; empty;){
+ if((e = ext4_dir_entry_next(&dir)) == nil)
+ break;
+ empty = e->name == nil || strcmp((char*)e->name, ".") == 0 || strcmp((char*)e->name, "..") == 0;
+ }
+ ext4_dir_close(&dir);
+ if(!empty){
+ werrstr("directory not empty");
+ goto error;
+ }else if(ext4_dir_rm(s) < 0)
+ goto error;
+ }else if(ext4_fremove(s) < 0)
+ goto error;
+ }else{
+ werrstr(Eperm);
+ goto error;
+ }
+
+ free(s);
+ respond(r, nil);
+ return;
+
+error:
+ free(s);
+ responderror(r);
+}
+
+static void
+rstat(Req *r)
+{
+ Aux *a;
+
+ a = r->fid->aux;
+ if(dirfill(&r->d, a, nil) != 0)
+ responderror(r);
+ else
+ respond(r, nil);
+}
+
+static void
+rwstat(Req *r)
+{
+ int res, isdir, wrperm, isowner, n;
+ struct ext4_inode inode;
+ char *old, *new, *s;
+ u32int uid, gid;
+ ext4_file f;
+ Aux *a, o;
+ Group *g;
+
+ a = r->fid->aux;
+ old = nil;
+ new = nil;
+
+ /* can't do anything to root, can't change the owner */
+ if(a->path[0] == 0 || (r->d.uid != nil && r->d.uid[0] != 0)){
+ werrstr(Eperm);
+ goto error;
+ }
+
+ if((old = smprint("%M/%s", a->p, a->path)) == nil){
+ werrstr("memory");
+ goto error;
+ }
+ new = old;
+
+ wrperm = haveperm(a, OWRITE, &inode);
+ uid = ext4_inode_get_uid(&inode);
+ isowner = uid == Root || a->uid == uid;
+
+ /* permission to truncate */
+ isdir = ext4_inode_type(a->p->sb, &inode) == EXT4_INODE_MODE_DIRECTORY;
+ if(r->d.length >= 0 && (!wrperm || isdir || !ext4_inode_can_truncate(a->p->sb, &inode))){
+ werrstr(Eperm);
+ goto error;
+ }
+
+ /* permission to rename */
+ if(r->d.name != nil && r->d.name[0] != 0){
+ if((s = strrchr(old, '/')) == nil){
+ werrstr("botched name");
+ goto error;
+ }
+ n = s - old;
+ if((new = malloc(n + 1 + strlen(r->d.name) + 1)) == nil){
+ werrstr("memory");
+ goto error;
+ }
+ memmove(new, old, n);
+ new[n++] = '/';
+ strcpy(new+n, r->d.name);
+
+ /* check parent write permission */
+ o = *a;
+ o.path = old;
+ if(!haveperm(&o, OWRITE, nil)){
+ werrstr(Eperm);
+ goto error;
+ }
+ *s = '/';
+ }
+
+ /* permission to change mode */
+ if(r->d.mode != ~0){
+ /* has to be owner and can't change dir bit */
+ if(!isowner || (!!isdir != !!(r->d.mode & DMDIR))){
+ werrstr(Eperm);
+ goto error;
+ }
+ }
+
+ /* permission to change mtime */
+ if(r->d.mtime != ~0 && !isowner){
+ werrstr(Eperm);
+ goto error;
+ }
+
+ /* permission to change gid */
+ if(r->d.gid != nil && r->d.gid[0] != 0){
+ /* has to be the owner, group has to exist, must be in that group */
+ if(!isowner || (g = findgroup(&a->p->groups, r->d.gid, &gid)) == nil || !ingroup(g, a->uid)){
+ werrstr(Eperm);
+ goto error;
+ }
+ }
+
+ /* done checking permissions, now apply all the changes and hope it all works */
+
+ /* rename */
+ if(r->d.name != nil && r->d.name[0] != 0){
+ if(ext4_frename(old, new) < 0)
+ goto error;
+
+ free(old);
+ old = new;
+ new = nil;
+
+ free(a->path);
+ a->path = strdup(strchr(old+1, '/')+1);
+ }
+
+ /* truncate */
+ if(r->d.length >= 0){
+ if(ext4_fopen2(&f, new, toext4mode(OWRITE, 0, 0)) < 0)
+ goto error;
+ res = ext4_ftruncate(&f, r->d.length);
+ ext4_fclose(&f);
+ if(res != 0)
+ goto error;
+ }
+
+ /* mode */
+ if(r->d.mode != ~0 && ext4_mode_set(new, r->d.mode & 0x1ff) < 0)
+ goto error;
+
+ /* mtime */
+ if(r->d.mtime != ~0 && ext4_mtime_set(new, r->d.mtime) < 0)
+ goto error;
+
+ /* gid */
+ if(r->d.gid != nil && r->d.gid[0] != 0 && ext4_owner_set(new, uid, gid) < 0)
+ goto error;
+
+ free(old);
+ if(new != old)
+ free(new);
+ respond(r, nil);
+ return;
+
+error:
+ free(old);
+ if(new != old)
+ free(new);
+ responderror(r);
+}
+
+static char *
+rwalk1(Fid *fid, char *name, Qid *qid)
+{
+ static char errbuf[ERRMAX];
+ struct ext4_inode inode;
+ u32int ino, t;
+ Aux *a, dir;
+ char *s, *q;
+
+ a = fid->aux;
+
+ /* try walking to the real file first */
+ if((s = fullpath(a)) == nil){
+ /* else try link itself. might want to just remove it anyway */
+ if((s = smprint("%M/%s", a->p, a->path)) == nil)
+ return "memory";
+ }
+ if(ext4_raw_inode_fill(s, &ino, &inode) < 0)
+ goto error;
+ t = ext4_inode_type(a->p->sb, &inode);
+ if(t != EXT4_INODE_MODE_DIRECTORY){
+ free(s);
+ return "not a directory";
+ }
+ dir = *a;
+ dir.path = strchr(s+1, '/')+1;
+ if(!haveperm(&dir, OEXEC, nil)){
+ free(s);
+ return Eperm;
+ }
+
+ q = s;
+ s = smprint("%s/%s", q, name);
+ cleanname(strchr(s+1, '/'));
+ free(q);
+ if((q = linkresolve(a, s, nil)) == nil){
+error:
+ free(s);
+ rerrstr(errbuf, sizeof(errbuf));
+ return errbuf;
+ }
+ s = q;
+ if(ext4_raw_inode_fill(s, &ino, &inode) < 0)
+ goto error;
+ t = ext4_inode_type(a->p->sb, &inode);
+ if(opts.linkmode == Lhide && t == EXT4_INODE_MODE_SOFTLINK){
+ free(s);
+ return "not found";
+ }
+ qid->type = 0;
+ qid->path = a->p->qidmask.path | ino;
+ qid->vers = ext4_inode_get_generation(&inode);
+ if(t == EXT4_INODE_MODE_DIRECTORY){
+ qid->type |= QTDIR;
+ a->type = Adir;
+ }else
+ a->type = Afile;
+ if(ext4_inode_get_flags(&inode) & EXT4_INODE_FLAG_APPEND)
+ qid->type |= QTAPPEND;
+ free(a->path);
+ a->path = strdup(strchr(s+1, '/')+1);
+ free(s);
+ fid->qid = *qid;
+
+ return nil;
+}
+
+static char *
+rclone(Fid *oldfid, Fid *newfid)
+{
+ Aux *a, *c;
+
+ a = oldfid->aux;
+
+ if((c = calloc(1, sizeof(*c))) == nil)
+ return "memory";
+ memmove(c, a, sizeof(*c));
+ c->path = strdup(a->path);
+ c->file = nil;
+ c->dir = nil;
+
+ incref(c->p);
+ newfid->aux = c;
+
+ return nil;
+}
+
+static void
+rdestroyfid(Fid *fid)
+{
+ Aux *a;
+
+ a = fid->aux;
+ if(a == nil)
+ return;
+ fid->aux = nil;
+
+ if(a->type == Adir && a->dir != nil){
+ ext4_dir_close(a->dir);
+ free(a->dir);
+ }else if(a->type == Afile && a->file != nil){
+ ext4_fclose(a->file);
+ free(a->file);
+ }
+
+ if(decref(a->p) < 1)
+ closepart(a->p);
+ free(a->path);
+ free(a);
+}
+
+static int
+note(void *, char *s)
+{
+ if(strncmp(s, "sys:", 4) != 0){
+ closeallparts();
+ close(0);
+ return 1;
+ }
+
+ return 0;
+}
+
+static void
+cmdsrv(void *)
+{
+ char s[32], *c, *a[4];
+ int f, p[2], n;
+ Biobuf b;
+
+ if(pipe(p) < 0)
+ sysfatal("%r");
+ snprint(s, sizeof(s), "#s/%s.cmd", srvname);
+ if((f = create(s, ORCLOSE|OWRITE, 0660)) < 0){
+ remove(s);
+ if((f = create(s, ORCLOSE|OWRITE, 0660)) < 0)
+ sysfatal("%r");
+ }
+ if(fprint(f, "%d", p[0]) < 1)
+ sysfatal("srv write");
+
+ dup(p[1], 0);
+ close(p[1]);
+ close(p[0]);
+
+ Binit(&b, 0, OREAD);
+ for(; (c = Brdstr(&b, '\n', 1)) != nil; free(c)){
+ if((n = tokenize(c, a, nelem(a))) < 1)
+ continue;
+ USED(n);
+ if(strcmp(a[0], "stats") == 0 || strcmp(a[0], "df") == 0){
+ statallparts();
+ }else if(strcmp(a[0], "halt") == 0){
+ closeallparts();
+ close(0);
+ threadexitsall(nil);
+ }else if(strcmp(a[0], "sync") == 0){
+ syncallparts();
+ }else{
+ print("unknown command: %s\n", a[0]);
+ }
+ }
+}
+
+static void
+rstart(Srv *)
+{
+ threadnotify(note, 1);
+ proccreate(cmdsrv, nil, mainstacksize);
+}
+
+static void
+rend(Srv *)
+{
+ closeallparts();
+ close(0);
+ threadexitsall(nil);
+}
+
+static Srv fs = {
+ .attach = rattach,
+ .open = ropen,
+ .create = rcreate,
+ .read = rread,
+ .write = rwrite,
+ .remove = rremove,
+ .stat = rstat,
+ .wstat = rwstat,
+ .walk1 = rwalk1,
+ .clone = rclone,
+ .destroyfid = rdestroyfid,
+ .start = rstart,
+ .end = rend,
+};
+
+static void
+usage(void)
+{
+ fprint(2, "usage: %s [-Clrs] [-g groupfile] [-R uid] [srvname]\n", argv0);
+ fprint(2, "mkfs: %s -M (2|3|4) [-L label] [-b blksize] [-N numinodes] [-I inodesize] device\n", argv0);
+ threadexitsall("usage");
+}
+
+void
+threadmain(int argc, char **argv)
+{
+ char *gr;
+ vlong sz;
+ int f, stdio;
+
+ rfork(RFNOTEG);
+
+ stdio = 0;
+ ARGBEGIN{
+ case 'D':
+ chatty9p++;
+nomkfs:
+ if(opts.fstype > 0)
+ usage();
+ opts.fstype = 0;
+ break;
+ case 'd':
+ ext4_dmask_set(strtoul(EARGF(usage()), nil, 0));
+ break;
+ case 'C':
+ opts.cachewb = 1;
+ goto nomkfs;
+ case 'l':
+ opts.linkmode = Lresolve;
+ goto nomkfs;
+ case 'g':
+ gr = EARGF(usage());
+ if((f = open(gr, OREAD)) < 0)
+ sysfatal("%r");
+ sz = seek(f, 0, 2);
+ if(sz < 0)
+ sysfatal("%s: invalid group file", gr);
+ if((opts.group = malloc(sz+1)) == nil)
+ sysfatal("memory");
+ seek(f, 0, 0);
+ if(readn(f, opts.group, sz) != sz)
+ sysfatal("%s: read failed", gr);
+ close(f);
+ opts.group[sz] = 0;
+ goto nomkfs;
+ case 'R':
+ opts.asroot = 1;
+ Root = atoll(EARGF(usage()));
+ goto nomkfs;
+ case 'r':
+ opts.rdonly = 1;
+ goto nomkfs;
+ case 's':
+ stdio = 1;
+ goto nomkfs;
+ case 'M':
+ if(!opts.fstype)
+ usage();
+ opts.fstype = atoi(EARGF(usage()));
+ if(opts.fstype < 2 || opts.fstype > 4)
+ usage();
+ break;
+
+ case 'b':
+ opts.blksz = atoi(EARGF(usage()));
+ if(opts.blksz != 1024 && opts.blksz != 2048 && opts.blksz != 4096)
+ usage();
+yesmkfs:
+ if(opts.fstype < 1)
+ usage();
+ break;
+ case 'L':
+ opts.label = EARGF(usage());
+ goto yesmkfs;
+ case 'I':
+ opts.inodesz = atoi(EARGF(usage()));
+ if(opts.inodesz < 128 || ((opts.inodesz-1) & opts.inodesz) != 0)
+ usage();
+ goto yesmkfs;
+ case 'N':
+ opts.ninode = atoi(EARGF(usage()));
+ if(opts.ninode < 1)
+ usage();
+ goto yesmkfs;
+
+ default:
+ usage();
+ }ARGEND
+
+ if(opts.fstype > 1){
+ if(argc != 1)
+ usage();
+ if(openpart(argv[0], &opts) == nil)
+ sysfatal("%r");
+ closeallparts();
+ threadexitsall(nil);
+ }else{
+ if(!stdio && argc == 1)
+ srvname = *argv;
+ else if(argc != 0)
+ usage();
+
+ if(stdio){
+ fs.infd = 0;
+ fs.outfd = 1;
+ threadsrv(&fs);
+ }else
+ threadpostsrv(&fs, srvname);
+ threadexits(nil);
+ }
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/group.c
@@ -1,0 +1,130 @@
+#include <u.h>
+#include <libc.h>
+#include "group.h"
+
+int
+loadgroups(Groups *gs, char *raw)
+{
+ char *m, *s, *e, *a[5], *ide;
+ Group *g, *memb;
+ int line, n, k;
+ vlong id;
+
+ memset(gs, 0, sizeof(*gs));
+ if((gs->raw = strdup(raw)) == nil)
+ goto error;
+
+ line = 1;
+ for(s = gs->raw; *s; s = e+1, line++){
+ if((e = strchr(s, '\n')) != nil)
+ *e = 0;
+
+ if((n = getfields(s, a, nelem(a), 1, ":")) >= 3 && strlen(a[0]) > 0 && strlen(a[2]) > 0){
+ id = strtoll(a[2], &ide, 0);
+ if(id < 0 || id > 0xffffffff || *ide != 0){
+ werrstr("invalid uid: %s", a[2]);
+ goto error;
+ }
+
+ if((g = realloc(gs->g, (gs->ng+1)*sizeof(Group))) == nil)
+ goto error;
+ gs->g = g;
+ g += gs->ng++;
+ memset(g, 0, sizeof(*g));
+ g->id = id;
+ g->name = a[0];
+ for(m = a[3]; n > 3 && *m; *m++ = 0){
+ if((memb = realloc(g->memb, (g->nmemb+1)*sizeof(Group))) == nil)
+ goto error;
+ g->memb = memb;
+ memb += g->nmemb++;
+ memset(memb, 0, sizeof(*memb));
+ memb->name = m;
+ if((m = strchr(m, ',')) == nil)
+ break;
+ }
+ }else{
+ werrstr("line %d: invalid record", line);
+ goto error;
+ }
+
+ if(e == nil)
+ break;
+ }
+
+ g = gs->g;
+ for(n = 0; n < gs->ng; n++, g++){
+ for(k = 0, memb = g->memb; k < g->nmemb; k++, memb++)
+ findgroup(gs, memb->name, &memb->id);
+ }
+
+ return 0;
+error:
+ werrstr("togroups: %r");
+ freegroups(gs);
+
+ return -1;
+}
+
+void
+freegroups(Groups *gs)
+{
+ int i;
+
+ for(i = 0; i < gs->ng; i++)
+ free(gs->g[i].memb);
+ free(gs->g);
+ free(gs->raw);
+}
+
+Group *
+findgroup(Groups *gs, char *name, u32int *id)
+{
+ Group *g;
+ int i;
+
+ g = gs->g;
+ for(i = 0; i < gs->ng; i++, g++){
+ if(strcmp(g->name, name) == 0){
+ if(id != nil)
+ *id = g->id;
+ return g;
+ }
+ }
+
+ if(id != nil)
+ *id = ~0;
+
+ return nil;
+}
+
+Group *
+findgroupid(Groups *gs, u32int id)
+{
+ Group *g;
+ int i;
+
+ g = gs->g;
+ for(i = 0; i < gs->ng; i++, g++){
+ if(g->id == id)
+ return g;
+ }
+
+ return nil;
+}
+
+int
+ingroup(Group *g, u32int id)
+{
+ int i;
+
+ if(g->id == id)
+ return 1;
+
+ for(i = g->nmemb, g = g->memb; i > 0; i--, g++){
+ if(g->id == id)
+ return 1;
+ }
+
+ return 0;
+}
--- /dev/null
+++ b/sys/src/cmd/ext4srv/group.h
@@ -1,0 +1,21 @@
+typedef struct Group Group;
+typedef struct Groups Groups;
+
+struct Group {
+ u32int id;
+ char *name;
+ Group *memb;
+ int nmemb;
+};
+
+struct Groups {
+ char *raw;
+ Group *g;
+ int ng;
+};
+
+int loadgroups(Groups *gs, char *raw);
+void freegroups(Groups *gs);
+Group *findgroup(Groups *gs, char *name, u32int *id);
+Group *findgroupid(Groups *gs, u32int id);
+int ingroup(Group *g, u32int id);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4.h
@@ -1,0 +1,529 @@
+#pragma once
+
+#include "ext4_types.h"
+#include "ext4_debug.h"
+#include "ext4_blockdev.h"
+
+#pragma incomplete struct ext4_mountpoint
+
+/********************************OS LOCK INFERFACE***************************/
+
+/**@brief OS dependent lock interface.*/
+struct ext4_lock {
+
+ /**@brief Lock access to mount point.*/
+ void (*lock)(void *aux);
+
+ /**@brief Unlock access to mount point.*/
+ void (*unlock)(void *aux);
+
+ /**@brief Auxilary pointer.*/
+ void *p_user;
+};
+
+/********************************FILE DESCRIPTOR*****************************/
+
+/**@brief File descriptor. */
+typedef struct ext4_file {
+
+ /**@brief Mount point handle.*/
+ struct ext4_mountpoint *mp;
+
+ /**@brief File inode id.*/
+ u32int inode;
+
+ /**@brief Open flags.*/
+ u32int flags;
+
+ /**@brief File size.*/
+ u64int fsize;
+
+ /**@brief Actual file position.*/
+ u64int fpos;
+} ext4_file;
+
+/*****************************DIRECTORY DESCRIPTOR***************************/
+
+/**@brief Directory entry descriptor. */
+typedef struct ext4_direntry {
+ u32int inode;
+ u16int entry_length;
+ u8int name_length;
+ u8int inode_type;
+ u8int name[255];
+} ext4_direntry;
+
+/**@brief Directory descriptor. */
+typedef struct ext4_dir {
+ /**@brief File descriptor.*/
+ ext4_file f;
+ /**@brief Current directory entry.*/
+ ext4_direntry de;
+ /**@brief Next entry offset.*/
+ u64int next_off;
+} ext4_dir;
+
+/********************************MOUNT OPERATIONS****************************/
+
+/**@brief Register block device.
+ *
+ * @param bd Block device.
+ * @param dev_name Block device name.
+ *
+ * @return Standard error code.*/
+int ext4_device_register(struct ext4_blockdev *bd,
+ const char *dev_name);
+
+/**@brief Un-register block device.
+ *
+ * @param dev_name Block device name.
+ *
+ * @return Standard error code.*/
+int ext4_device_unregister(const char *dev_name);
+
+/**@brief Un-register all block devices.
+ *
+ * @return Standard error code.*/
+int ext4_device_unregister_all(void);
+
+/**@brief Mount a block device with EXT4 partition to the mount point.
+ *
+ * @param dev_name Block device name (@ref ext4_device_register).
+ * @param mount_point Mount point, for example:
+ * - /
+ * - /my_partition/
+ * - /my_second_partition/
+ * @param read_only mount as read-only mode.
+ *
+ * @return Standard error code */
+int ext4_mount(const char *dev_name,
+ const char *mount_point,
+ bool read_only);
+
+/**@brief Umount operation.
+ *
+ * @param mount_point Mount point.
+ *
+ * @return Standard error code */
+int ext4_umount(const char *mount_point);
+
+/**@brief Starts journaling. Journaling start/stop functions are transparent
+ * and might be used on filesystems without journaling support.
+ * @warning Usage:
+ * ext4_mount("sda1", "/");
+ * ext4_journal_start("/");
+ *
+ * //File operations here...
+ *
+ * ext4_journal_stop("/");
+ * ext4_umount("/");
+ * @param mount_point Mount point.
+ *
+ * @return Standard error code. */
+int ext4_journal_start(const char *mount_point);
+
+/**@brief Stops journaling. Journaling start/stop functions are transparent
+ * and might be used on filesystems without journaling support.
+ *
+ * @param mount_point Mount point name.
+ *
+ * @return Standard error code. */
+int ext4_journal_stop(const char *mount_point);
+
+/**@brief Journal recovery.
+ * @warning Must be called after @ref ext4_mount.
+ *
+ * @param mount_point Mount point.
+ *
+ * @return Standard error code. */
+int ext4_recover(const char *mount_point);
+
+/**@brief Some of the filesystem stats. */
+struct ext4_mount_stats {
+ u32int inodes_count;
+ u32int free_inodes_count;
+ u64int blocks_count;
+ u64int free_blocks_count;
+
+ u32int block_size;
+ u32int block_group_count;
+ u32int blocks_per_group;
+ u32int inodes_per_group;
+
+ char volume_name[16];
+};
+
+/**@brief Get file mount point stats.
+ *
+ * @param mount_point Mount point.
+ * @param stats Filesystem stats.
+ *
+ * @return Standard error code. */
+int ext4_mount_point_stats(const char *mount_point,
+ struct ext4_mount_stats *stats);
+
+/**@brief Setup OS lock routines.
+ *
+ * @param mount_point Mount point.
+ * @param locks Lock and unlock functions
+ *
+ * @return Standard error code. */
+int ext4_mount_setup_locks(const char *mount_point,
+ const struct ext4_lock *locks);
+
+/**@brief Acquire the filesystem superblock pointer of a mp.
+ *
+ * @param mount_point Mount point.
+ * @param sb Superblock handle
+ *
+ * @return Standard error code. */
+int ext4_get_sblock(const char *mount_point, struct ext4_sblock **sb);
+
+/**@brief Enable/disable write back cache mode.
+ * @warning Default model of cache is write through. It means that when you do:
+ *
+ * ext4_fopen(...);
+ * ext4_fwrite(...);
+ * < --- data is flushed to physical drive
+ *
+ * When you do:
+ * ext4_cache_write_back(..., 1);
+ * ext4_fopen(...);
+ * ext4_fwrite(...);
+ * < --- data is NOT flushed to physical drive
+ * ext4_cache_write_back(..., 0);
+ * < --- when write back mode is disabled all
+ * cache data will be flushed
+ * To enable write back mode permanently just call this function
+ * once after ext4_mount (and disable before ext4_umount).
+ *
+ * Some of the function use write back cache mode internally.
+ * If you enable write back mode twice you have to disable it twice
+ * to flush all data:
+ *
+ * ext4_cache_write_back(..., 1);
+ * ext4_cache_write_back(..., 1);
+ *
+ * ext4_cache_write_back(..., 0);
+ * ext4_cache_write_back(..., 0);
+ *
+ * Write back mode is useful when you want to create a lot of empty
+ * files/directories.
+ *
+ * @param path Path.
+ * @param on Enable/disable cache writeback mode.
+ *
+ * @return Standard error code. */
+int ext4_cache_write_back(const char *path, bool on);
+
+
+/**@brief Force cache flush.
+ *
+ * @param path Path.
+ *
+ * @return Standard error code. */
+int ext4_cache_flush(const char *path);
+
+/********************************FILE OPERATIONS*****************************/
+
+/**@brief Remove file by path.
+ *
+ * @param path Path to file.
+ *
+ * @return Standard error code. */
+int ext4_fremove(const char *path);
+
+/**@brief Create a hardlink for a file.
+ *
+ * @param path Path to file.
+ * @param hardlink_path Path of hardlink.
+ *
+ * @return Standard error code. */
+int ext4_flink(const char *path, const char *hardlink_path);
+
+/**@brief Rename file.
+ * @param path Source.
+ * @param new_path Destination.
+ * @return Standard error code. */
+int ext4_frename(const char *path, const char *new_path);
+
+/**@brief File open function.
+ *
+ * @param file File handle.
+ * @param path File path, has to start from mount point:/my_partition/file.
+ * @param flags File open flags.
+ * |---------------------------------------------------------------|
+ * | r or rb O_RDONLY |
+ * |---------------------------------------------------------------|
+ * | w or wb O_WRONLY|O_CREAT|O_TRUNC |
+ * |---------------------------------------------------------------|
+ * | a or ab O_WRONLY|O_CREAT|O_APPEND |
+ * |---------------------------------------------------------------|
+ * | r+ or rb+ or r+b O_RDWR |
+ * |---------------------------------------------------------------|
+ * | w+ or wb+ or w+b O_RDWR|O_CREAT|O_TRUNC |
+ * |---------------------------------------------------------------|
+ * | a+ or ab+ or a+b O_RDWR|O_CREAT|O_APPEND |
+ * |---------------------------------------------------------------|
+ *
+ * @return Standard error code.*/
+int ext4_fopen(ext4_file *file, const char *path, const char *flags);
+
+/**@brief Alternate file open function.
+ *
+ * @param file File handle.
+ * @param path File path, has to start from mount point:/my_partition/file.
+ * @param flags File open flags.
+ *
+ * @return Standard error code.*/
+int ext4_fopen2(ext4_file *file, const char *path, int flags);
+
+/**@brief File close function.
+ *
+ * @param file File handle.
+ *
+ * @return Standard error code.*/
+int ext4_fclose(ext4_file *file);
+
+
+/**@brief File truncate function.
+ *
+ * @param file File handle.
+ * @param size New file size.
+ *
+ * @return Standard error code.*/
+int ext4_ftruncate(ext4_file *file, u64int size);
+
+/**@brief Read data from file.
+ *
+ * @param file File handle.
+ * @param buf Output buffer.
+ * @param size Bytes to read.
+ * @param rcnt Bytes read (nil allowed).
+ *
+ * @return Standard error code.*/
+int ext4_fread(ext4_file *file, void *buf, usize size, usize *rcnt);
+
+/**@brief Write data to file.
+ *
+ * @param file File handle.
+ * @param buf Data to write
+ * @param size Write length..
+ * @param wcnt Bytes written (nil allowed).
+ *
+ * @return Standard error code.*/
+int ext4_fwrite(ext4_file *file, const void *buf, usize size, usize *wcnt);
+
+/**@brief File seek operation.
+ *
+ * @param file File handle.
+ * @param offset Offset to seek.
+ * @param origin Seek type:
+ * @ref SEEK_SET
+ * @ref SEEK_CUR
+ * @ref SEEK_END
+ *
+ * @return Standard error code.*/
+int ext4_fseek(ext4_file *file, s64int offset, u32int origin);
+
+/**@brief Get file position.
+ *
+ * @param file File handle.
+ *
+ * @return Actual file position */
+u64int ext4_ftell(ext4_file *file);
+
+/**@brief Get file size.
+ *
+ * @param file File handle.
+ *
+ * @return File size. */
+u64int ext4_fsize(ext4_file *file);
+
+
+/**@brief Get inode of file/directory/link.
+ *
+ * @param path Parh to file/dir/link.
+ * @param ret_ino Inode number.
+ * @param inode Inode internals.
+ *
+ * @return Standard error code.*/
+int ext4_raw_inode_fill(const char *path, u32int *ret_ino,
+ struct ext4_inode *inode);
+
+/**@brief Check if inode exists.
+ *
+ * @param path Parh to file/dir/link.
+ * @param type Inode type.
+ * @ref EXT4_DE_UNKNOWN
+ * @ref EXT4_DE_REG_FILE
+ * @ref EXT4_DE_DIR
+ * @ref EXT4_DE_CHRDEV
+ * @ref EXT4_DE_BLKDEV
+ * @ref EXT4_DE_FIFO
+ * @ref EXT4_DE_SOCK
+ * @ref EXT4_DE_SYMLINK
+ *
+ * @return Standard error code.*/
+int ext4_inode_exist(const char *path, int type);
+
+/**@brief Change file/directory/link mode bits.
+ *
+ * @param path Path to file/dir/link.
+ * @param mode New mode bits (for example 0777).
+ *
+ * @return Standard error code.*/
+int ext4_mode_set(const char *path, u32int mode);
+
+
+/**@brief Get file/directory/link mode bits.
+ *
+ * @param path Path to file/dir/link.
+ * @param mode New mode bits (for example 0777).
+ *
+ * @return Standard error code.*/
+int ext4_mode_get(const char *path, u32int *mode);
+
+/**@brief Change file owner and group.
+ *
+ * @param path Path to file/dir/link.
+ * @param uid User id.
+ * @param gid Group id.
+ *
+ * @return Standard error code.*/
+int ext4_owner_set(const char *path, u32int uid, u32int gid);
+
+/**@brief Get file/directory/link owner and group.
+ *
+ * @param path Path to file/dir/link.
+ * @param uid User id.
+ * @param gid Group id.
+ *
+ * @return Standard error code.*/
+int ext4_owner_get(const char *path, u32int *uid, u32int *gid);
+
+/**@brief Set file/directory/link access time.
+ *
+ * @param path Path to file/dir/link.
+ * @param atime Access timestamp.
+ *
+ * @return Standard error code.*/
+int ext4_atime_set(const char *path, u32int atime);
+
+/**@brief Set file/directory/link modify time.
+ *
+ * @param path Path to file/dir/link.
+ * @param mtime Modify timestamp.
+ *
+ * @return Standard error code.*/
+int ext4_mtime_set(const char *path, u32int mtime);
+
+/**@brief Set file/directory/link change time.
+ *
+ * @param path Path to file/dir/link.
+ * @param ctime Change timestamp.
+ *
+ * @return Standard error code.*/
+int ext4_ctime_set(const char *path, u32int ctime);
+
+/**@brief Get file/directory/link access time.
+ *
+ * @param path Path to file/dir/link.
+ * @param atime Access timestamp.
+ *
+ * @return Standard error code.*/
+int ext4_atime_get(const char *path, u32int *atime);
+
+/**@brief Get file/directory/link modify time.
+ *
+ * @param path Path to file/dir/link.
+ * @param mtime Modify timestamp.
+ *
+ * @return Standard error code.*/
+int ext4_mtime_get(const char *path, u32int *mtime);
+
+/**@brief Get file/directory/link change time.
+ *
+ * @param path Pathto file/dir/link.
+ * @param ctime Change timestamp.
+ *
+ * @return standard error code*/
+int ext4_ctime_get(const char *path, u32int *ctime);
+
+/**@brief Create symbolic link.
+ *
+ * @param target Destination entry path.
+ * @param path Source entry path.
+ *
+ * @return Standard error code.*/
+int ext4_fsymlink(const char *target, const char *path);
+
+/**@brief Create special file.
+ * @param path Path to new special file.
+ * @param filetype Filetype of the new special file.
+ * (that must not be regular file, directory, or unknown type)
+ * @param dev If filetype is char device or block device,
+ * the device number will become the payload in the inode.
+ * @return Standard error code.*/
+int ext4_mknod(const char *path, int filetype, u32int dev);
+
+/**@brief Read symbolic link payload.
+ *
+ * @param path Path to symlink.
+ * @param buf Output buffer.
+ * @param bufsize Output buffer max size.
+ * @param rcnt Bytes read.
+ *
+ * @return Standard error code.*/
+int ext4_readlink(const char *path, char *buf, usize bufsize, usize *rcnt);
+
+/*********************************DIRECTORY OPERATION***********************/
+
+/**@brief Recursive directory remove.
+ *
+ * @param path Directory path to remove
+ *
+ * @return Standard error code.*/
+int ext4_dir_rm(const char *path);
+
+/**@brief Rename/move directory.
+ *
+ * @param path Source path.
+ * @param new_path Destination path.
+ *
+ * @return Standard error code. */
+int ext4_dir_mv(const char *path, const char *new_path);
+
+/**@brief Create new directory.
+ *
+ * @param path Directory name.
+ *
+ * @return Standard error code.*/
+int ext4_dir_mk(const char *path);
+
+/**@brief Directory open.
+ *
+ * @param dir Directory handle.
+ * @param path Directory path.
+ *
+ * @return Standard error code.*/
+int ext4_dir_open(ext4_dir *dir, const char *path);
+
+/**@brief Directory close.
+ *
+ * @param dir directory handle.
+ *
+ * @return Standard error code.*/
+int ext4_dir_close(ext4_dir *dir);
+
+/**@brief Return next directory entry.
+ *
+ * @param dir Directory handle.
+ *
+ * @return Directory entry id (nil if no entry)*/
+const ext4_direntry *ext4_dir_entry_next(ext4_dir *dir);
+
+/**@brief Rewind directory entry offset.
+ *
+ * @param dir Directory handle.*/
+void ext4_dir_entry_rewind(ext4_dir *dir);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_balloc.h
@@ -1,0 +1,62 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_fs.h"
+
+/**@brief Compute number of block group from block address.
+ * @param sb superblock pointer.
+ * @param baddr Absolute address of block.
+ * @return Block group index
+ */
+u32int ext4_balloc_get_bgid_of_block(struct ext4_sblock *s,
+ ext4_fsblk_t baddr);
+
+/**@brief Compute the starting block address of a block group
+ * @param sb superblock pointer.
+ * @param bgid block group index
+ * @return Block address
+ */
+ext4_fsblk_t ext4_balloc_get_block_of_bgid(struct ext4_sblock *s,
+ u32int bgid);
+
+/**@brief Calculate and set checksum of block bitmap.
+ * @param sb superblock pointer.
+ * @param bg block group
+ * @param bitmap bitmap buffer
+ */
+void ext4_balloc_set_bitmap_csum(struct ext4_sblock *sb,
+ struct ext4_bgroup *bg,
+ void *bitmap);
+
+/**@brief Free block from inode.
+ * @param inode_ref inode reference
+ * @param baddr block address
+ * @return standard error code*/
+int ext4_balloc_free_block(struct ext4_inode_ref *inode_ref,
+ ext4_fsblk_t baddr);
+
+/**@brief Free blocks from inode.
+ * @param inode_ref inode reference
+ * @param first block address
+ * @param count block count
+ * @return standard error code*/
+int ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref,
+ ext4_fsblk_t first, u32int count);
+
+/**@brief Allocate block procedure.
+ * @param inode_ref inode reference
+ * @param goal
+ * @param baddr allocated block address
+ * @return standard error code*/
+int ext4_balloc_alloc_block(struct ext4_inode_ref *inode_ref,
+ ext4_fsblk_t goal,
+ ext4_fsblk_t *baddr);
+
+/**@brief Try allocate selected block.
+ * @param inode_ref inode reference
+ * @param baddr block address to allocate
+ * @param free if baddr is not allocated
+ * @return standard error code*/
+int ext4_balloc_try_alloc_block(struct ext4_inode_ref *inode_ref,
+ ext4_fsblk_t baddr, bool *free);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_bcache.h
@@ -1,0 +1,240 @@
+#pragma once
+
+#include "tree.h"
+#include "queue.h"
+
+#define EXT4_BLOCK_ZERO() \
+ {0}
+
+/**@brief Single block descriptor*/
+struct ext4_buf {
+ /**@brief Flags*/
+ int flags;
+
+ /**@brief Logical block address*/
+ u64int lba;
+
+ /**@brief Data buffer.*/
+ u8int *data;
+
+ /**@brief LRU priority. (unused) */
+ u32int lru_prio;
+
+ /**@brief LRU id.*/
+ u32int lru_id;
+
+ /**@brief Reference count table*/
+ u32int refctr;
+
+ /**@brief The block cache this buffer belongs to. */
+ struct ext4_bcache *bc;
+
+ /**@brief Whether or not buffer is on dirty list.*/
+ bool on_dirty_list;
+
+ /**@brief LBA tree node*/
+ RB_ENTRY(ext4_buf) lba_node;
+
+ /**@brief LRU tree node*/
+ RB_ENTRY(ext4_buf) lru_node;
+
+ /**@brief Dirty list node*/
+ SLIST_ENTRY(ext4_buf) dirty_node;
+
+ /**@brief Callback routine after a disk-write operation.
+ * @param bc block cache descriptor
+ * @param buf buffer descriptor
+ * @param standard error code returned by bdev->bwrite()
+ * @param arg argument passed to this routine*/
+ void (*end_write)(struct ext4_bcache *bc,
+ struct ext4_buf *buf,
+ int res,
+ void *arg);
+
+ /**@brief argument passed to end_write() callback.*/
+ void *end_write_arg;
+};
+
+/**@brief Single block descriptor*/
+struct ext4_block {
+ /**@brief Logical block ID*/
+ u64int lb_id;
+
+ /**@brief Buffer */
+ struct ext4_buf *buf;
+
+ /**@brief Data buffer.*/
+ u8int *data;
+};
+
+/**@brief Block cache descriptor*/
+struct ext4_bcache {
+
+ /**@brief Item count in block cache*/
+ u32int cnt;
+
+ /**@brief Item size in block cache*/
+ u32int itemsize;
+
+ /**@brief Last recently used counter*/
+ u32int lru_ctr;
+
+ /**@brief Currently referenced datablocks*/
+ u32int ref_blocks;
+
+ /**@brief Maximum referenced datablocks*/
+ u32int max_ref_blocks;
+
+ /**@brief The blockdev binded to this block cache*/
+ struct ext4_blockdev *bdev;
+
+ /**@brief The cache should not be shaked */
+ bool dont_shake;
+
+ /**@brief A tree holding all bufs*/
+ RB_HEAD(ext4_buf_lba, ext4_buf) lba_root;
+
+ /**@brief A tree holding unreferenced bufs*/
+ RB_HEAD(ext4_buf_lru, ext4_buf) lru_root;
+
+ /**@brief A singly-linked list holding dirty buffers*/
+ SLIST_HEAD(ext4_buf_dirty, ext4_buf) dirty_list;
+};
+
+/**@brief buffer state bits
+ *
+ * - BC_UPTODATE: Buffer contains valid data.
+ * - BC_DIRTY: Buffer is dirty.
+ * - BC_FLUSH: Buffer will be immediately flushed,
+ * when no one references it.
+ * - BC_TMP: Buffer will be dropped once its refctr
+ * reaches zero.
+ */
+enum bcache_state_bits {
+ BC_UPTODATE,
+ BC_DIRTY,
+ BC_FLUSH,
+ BC_TMP
+};
+
+#define ext4_bcache_set_flag(buf, b) \
+ (buf)->flags |= 1 << (b)
+
+#define ext4_bcache_clear_flag(buf, b) \
+ (buf)->flags &= ~(1 << (b))
+
+#define ext4_bcache_test_flag(buf, b) \
+ (((buf)->flags & (1 << (b))) >> (b))
+
+static inline void ext4_bcache_set_dirty(struct ext4_buf *buf) {
+ ext4_bcache_set_flag(buf, BC_UPTODATE);
+ ext4_bcache_set_flag(buf, BC_DIRTY);
+}
+
+static inline void ext4_bcache_clear_dirty(struct ext4_buf *buf) {
+ ext4_bcache_clear_flag(buf, BC_UPTODATE);
+ ext4_bcache_clear_flag(buf, BC_DIRTY);
+}
+
+/**@brief Increment reference counter of buf by 1.*/
+#define ext4_bcache_inc_ref(buf) ((buf)->refctr++)
+
+/**@brief Decrement reference counter of buf by 1.*/
+#define ext4_bcache_dec_ref(buf) ((buf)->refctr--)
+
+/**@brief Insert buffer to dirty cache list
+ * @param bc block cache descriptor
+ * @param buf buffer descriptor */
+static inline void
+ext4_bcache_insert_dirty_node(struct ext4_bcache *bc, struct ext4_buf *buf) {
+ if (!buf->on_dirty_list) {
+ SLIST_INSERT_HEAD(&bc->dirty_list, buf, dirty_node);
+ buf->on_dirty_list = true;
+ }
+}
+
+/**@brief Remove buffer to dirty cache list
+ * @param bc block cache descriptor
+ * @param buf buffer descriptor */
+static inline void
+ext4_bcache_remove_dirty_node(struct ext4_bcache *bc, struct ext4_buf *buf) {
+ if (buf->on_dirty_list) {
+ SLIST_REMOVE(&bc->dirty_list, buf, ext4_buf, dirty_node);
+ buf->on_dirty_list = false;
+ }
+}
+
+
+/**@brief Dynamic initialization of block cache.
+ * @param bc block cache descriptor
+ * @param cnt items count in block cache
+ * @param itemsize single item size (in bytes)
+ * @return standard error code*/
+int ext4_bcache_init_dynamic(struct ext4_bcache *bc, u32int cnt,
+ u32int itemsize);
+
+/**@brief Do cleanup works on block cache.
+ * @param bc block cache descriptor.*/
+void ext4_bcache_cleanup(struct ext4_bcache *bc);
+
+/**@brief Dynamic de-initialization of block cache.
+ * @param bc block cache descriptor
+ * @return standard error code*/
+int ext4_bcache_fini_dynamic(struct ext4_bcache *bc);
+
+/**@brief Get a buffer with the lowest LRU counter in bcache.
+ * @param bc block cache descriptor
+ * @return buffer with the lowest LRU counter*/
+struct ext4_buf *ext4_buf_lowest_lru(struct ext4_bcache *bc);
+
+/**@brief Drop unreferenced buffer from bcache.
+ * @param bc block cache descriptor
+ * @param buf buffer*/
+void ext4_bcache_drop_buf(struct ext4_bcache *bc, struct ext4_buf *buf);
+
+/**@brief Invalidate a buffer.
+ * @param bc block cache descriptor
+ * @param buf buffer*/
+void ext4_bcache_invalidate_buf(struct ext4_bcache *bc,
+ struct ext4_buf *buf);
+
+/**@brief Invalidate a range of buffers.
+ * @param bc block cache descriptor
+ * @param from starting lba
+ * @param cnt block counts
+ * @param buf buffer*/
+void ext4_bcache_invalidate_lba(struct ext4_bcache *bc,
+ u64int from,
+ u32int cnt);
+
+/**@brief Find existing buffer from block cache memory.
+ * Unreferenced block allocation is based on LRU
+ * (Last Recently Used) algorithm.
+ * @param bc block cache descriptor
+ * @param b block to alloc
+ * @param lba logical block address
+ * @return block cache buffer */
+struct ext4_buf *
+ext4_bcache_find_get(struct ext4_bcache *bc, struct ext4_block *b,
+ u64int lba);
+
+/**@brief Allocate block from block cache memory.
+ * Unreferenced block allocation is based on LRU
+ * (Last Recently Used) algorithm.
+ * @param bc block cache descriptor
+ * @param b block to alloc
+ * @param is_new block is new (needs to be read)
+ * @return standard error code*/
+int ext4_bcache_alloc(struct ext4_bcache *bc, struct ext4_block *b,
+ bool *is_new);
+
+/**@brief Free block from cache memory (decrement reference counter).
+ * @param bc block cache descriptor
+ * @param b block to free
+ * @return standard error code*/
+int ext4_bcache_free(struct ext4_bcache *bc, struct ext4_block *b);
+
+/**@brief Return a full status of block cache.
+ * @param bc block cache descriptor
+ * @return full status*/
+bool ext4_bcache_is_full(struct ext4_bcache *bc);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_bitmap.h
@@ -1,0 +1,49 @@
+#pragma once
+
+#include "ext4_config.h"
+
+/**@brief Set bitmap bit.
+ * @param bmap bitmap
+ * @param bit bit to set*/
+static inline void ext4_bmap_bit_set(u8int *bmap, u32int bit)
+{
+ *(bmap + (bit >> 3)) |= (1 << (bit & 7));
+}
+
+/**@brief Clear bitmap bit.
+ * @param bmap bitmap buffer
+ * @param bit bit to clear*/
+static inline void ext4_bmap_bit_clr(u8int *bmap, u32int bit)
+{
+ *(bmap + (bit >> 3)) &= ~(1 << (bit & 7));
+}
+
+/**@brief Check if the bitmap bit is set.
+ * @param bmap bitmap buffer
+ * @param bit bit to check*/
+static inline bool ext4_bmap_is_bit_set(u8int *bmap, u32int bit)
+{
+ return (*(bmap + (bit >> 3)) & (1 << (bit & 7)));
+}
+
+/**@brief Check if the bitmap bit is clear.
+ * @param bmap bitmap buffer
+ * @param bit bit to check*/
+static inline bool ext4_bmap_is_bit_clr(u8int *bmap, u32int bit)
+{
+ return !ext4_bmap_is_bit_set(bmap, bit);
+}
+
+/**@brief Free range of bits in bitmap.
+ * @param bmap bitmap buffer
+ * @param sbit start bit
+ * @param bcnt bit count*/
+void ext4_bmap_bits_free(u8int *bmap, u32int sbit, u32int bcnt);
+
+/**@brief Find first clear bit in bitmap.
+ * @param sbit start bit of search
+ * @param ebit end bit of search
+ * @param bit_id output parameter (first free bit)
+ * @return standard error code*/
+int ext4_bmap_bit_find_clr(u8int *bmap, u32int sbit, u32int ebit,
+ u32int *bit_id, bool *no_space);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_block_group.h
@@ -1,0 +1,271 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_super.h"
+
+/**@brief Get address of block with data block bitmap.
+ * @param bg pointer to block group
+ * @param s pointer to superblock
+ * @return Address of block with block bitmap
+ */
+static inline u64int ext4_bg_get_block_bitmap(struct ext4_bgroup *bg,
+ struct ext4_sblock *s)
+{
+ u64int v = to_le32(bg->block_bitmap_lo);
+
+ if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ v |= (u64int)to_le32(bg->block_bitmap_hi) << 32;
+
+ return v;
+}
+
+/**@brief Set address of block with data block bitmap.
+ * @param bg pointer to block group
+ * @param s pointer to superblock
+ * @param blk block to set
+ * @return Address of block with block bitmap
+ */
+static inline void ext4_bg_set_block_bitmap(struct ext4_bgroup *bg,
+ struct ext4_sblock *s, u64int blk)
+{
+
+ bg->block_bitmap_lo = to_le32((u32int)blk);
+ if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ bg->block_bitmap_hi = to_le32(blk >> 32);
+
+}
+
+/**@brief Get address of block with i-node bitmap.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @return Address of block with i-node bitmap
+ */
+static inline u64int ext4_bg_get_inode_bitmap(struct ext4_bgroup *bg,
+ struct ext4_sblock *s)
+{
+
+ u64int v = to_le32(bg->inode_bitmap_lo);
+
+ if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ v |= (u64int)to_le32(bg->inode_bitmap_hi) << 32;
+
+ return v;
+}
+
+/**@brief Set address of block with i-node bitmap.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @param blk block to set
+ * @return Address of block with i-node bitmap
+ */
+static inline void ext4_bg_set_inode_bitmap(struct ext4_bgroup *bg,
+ struct ext4_sblock *s, u64int blk)
+{
+ bg->inode_bitmap_lo = to_le32((u32int)blk);
+ if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ bg->inode_bitmap_hi = to_le32(blk >> 32);
+
+}
+
+
+/**@brief Get address of the first block of the i-node table.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @return Address of first block of i-node table
+ */
+static inline u64int
+ext4_bg_get_inode_table_first_block(struct ext4_bgroup *bg,
+ struct ext4_sblock *s)
+{
+ u64int v = to_le32(bg->inode_table_first_block_lo);
+
+ if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ v |= (u64int)to_le32(bg->inode_table_first_block_hi) << 32;
+
+ return v;
+}
+
+/**@brief Set address of the first block of the i-node table.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @param blk block to set
+ * @return Address of first block of i-node table
+ */
+static inline void
+ext4_bg_set_inode_table_first_block(struct ext4_bgroup *bg,
+ struct ext4_sblock *s, u64int blk)
+{
+ bg->inode_table_first_block_lo = to_le32((u32int)blk);
+ if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ bg->inode_table_first_block_hi = to_le32(blk >> 32);
+}
+
+/**@brief Get number of free blocks in block group.
+ * @param bg Pointer to block group
+ * @param sb Pointer to superblock
+ * @return Number of free blocks in block group
+ */
+static inline u32int ext4_bg_get_free_blocks_count(struct ext4_bgroup *bg,
+ struct ext4_sblock *s)
+{
+ u32int v = to_le16(bg->free_blocks_count_lo);
+
+ if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ v |= (u32int)to_le16(bg->free_blocks_count_hi) << 16;
+
+ return v;
+}
+
+/**@brief Set number of free blocks in block group.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @param cnt Number of free blocks in block group
+ */
+static inline void ext4_bg_set_free_blocks_count(struct ext4_bgroup *bg,
+ struct ext4_sblock *s,
+ u32int cnt)
+{
+ bg->free_blocks_count_lo = to_le16((cnt << 16) >> 16);
+ if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ bg->free_blocks_count_hi = to_le16(cnt >> 16);
+}
+
+/**@brief Get number of free i-nodes in block group.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @return Number of free i-nodes in block group
+ */
+static inline u32int ext4_bg_get_free_inodes_count(struct ext4_bgroup *bg,
+ struct ext4_sblock *s)
+{
+ u32int v = to_le16(bg->free_inodes_count_lo);
+
+ if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ v |= (u32int)to_le16(bg->free_inodes_count_hi) << 16;
+
+ return v;
+}
+
+/**@brief Set number of free i-nodes in block group.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @param cnt Number of free i-nodes in block group
+ */
+static inline void ext4_bg_set_free_inodes_count(struct ext4_bgroup *bg,
+ struct ext4_sblock *s,
+ u32int cnt)
+{
+ bg->free_inodes_count_lo = to_le16((cnt << 16) >> 16);
+ if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ bg->free_inodes_count_hi = to_le16(cnt >> 16);
+}
+
+/**@brief Get number of used directories in block group.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @return Number of used directories in block group
+ */
+static inline u32int ext4_bg_get_used_dirs_count(struct ext4_bgroup *bg,
+ struct ext4_sblock *s)
+{
+ u32int v = to_le16(bg->used_dirs_count_lo);
+
+ if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ v |= (u32int)to_le16(bg->used_dirs_count_hi) << 16;
+
+ return v;
+}
+
+/**@brief Set number of used directories in block group.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @param cnt Number of used directories in block group
+ */
+static inline void ext4_bg_set_used_dirs_count(struct ext4_bgroup *bg,
+ struct ext4_sblock *s,
+ u32int cnt)
+{
+ bg->used_dirs_count_lo = to_le16((cnt << 16) >> 16);
+ if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ bg->used_dirs_count_hi = to_le16(cnt >> 16);
+}
+
+/**@brief Get number of unused i-nodes.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @return Number of unused i-nodes
+ */
+static inline u32int ext4_bg_get_itable_unused(struct ext4_bgroup *bg,
+ struct ext4_sblock *s)
+{
+
+ u32int v = to_le16(bg->itable_unused_lo);
+
+ if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ v |= (u32int)to_le16(bg->itable_unused_hi) << 16;
+
+ return v;
+}
+
+/**@brief Set number of unused i-nodes.
+ * @param bg Pointer to block group
+ * @param s Pointer to superblock
+ * @param cnt Number of unused i-nodes
+ */
+static inline void ext4_bg_set_itable_unused(struct ext4_bgroup *bg,
+ struct ext4_sblock *s,
+ u32int cnt)
+{
+ bg->itable_unused_lo = to_le16((cnt << 16) >> 16);
+ if (ext4_sb_get_desc_size(s) > EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE)
+ bg->itable_unused_hi = to_le16(cnt >> 16);
+}
+
+/**@brief Set checksum of block group.
+ * @param bg Pointer to block group
+ * @param crc Cheksum of block group
+ */
+static inline void ext4_bg_set_checksum(struct ext4_bgroup *bg, u16int crc)
+{
+ bg->checksum = to_le16(crc);
+}
+
+/**@brief Check if block group has a flag.
+ * @param bg Pointer to block group
+ * @param flag Flag to be checked
+ * @return True if flag is set to 1
+ */
+static inline bool ext4_bg_has_flag(struct ext4_bgroup *bg, u32int f)
+{
+ return to_le16(bg->flags) & f;
+}
+
+/**@brief Set flag of block group.
+ * @param bg Pointer to block group
+ * @param flag Flag to be set
+ */
+static inline void ext4_bg_set_flag(struct ext4_bgroup *bg, u32int f)
+{
+ u16int flags = to_le16(bg->flags);
+ flags |= f;
+ bg->flags = to_le16(flags);
+}
+
+/**@brief Clear flag of block group.
+ * @param bg Pointer to block group
+ * @param flag Flag to be cleared
+ */
+static inline void ext4_bg_clear_flag(struct ext4_bgroup *bg, u32int f)
+{
+ u16int flags = to_le16(bg->flags);
+ flags &= ~f;
+ bg->flags = to_le16(flags);
+}
+
+/**@brief Calculate CRC16 of the block group.
+ * @param crc Init value
+ * @param buffer Input buffer
+ * @param len Sizeof input buffer
+ * @return Computed CRC16*/
+u16int ext4_bg_crc16(u16int crc, const u8int *buffer, usize len);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_blockdev.h
@@ -1,0 +1,215 @@
+#pragma once
+
+#include "ext4_bcache.h"
+
+struct ext4_blockdev_iface {
+ /**@brief Open device function
+ * @param bdev block device.*/
+ int (*open)(struct ext4_blockdev *bdev);
+
+ /**@brief Block read function.
+ * @param bdev block device
+ * @param buf output buffer
+ * @param blk_id block id
+ * @param blk_cnt block count*/
+ int (*bread)(struct ext4_blockdev *bdev, void *buf, u64int blk_id,
+ u32int blk_cnt);
+
+ /**@brief Block write function.
+ * @param buf input buffer
+ * @param blk_id block id
+ * @param blk_cnt block count*/
+ int (*bwrite)(struct ext4_blockdev *bdev, const void *buf,
+ u64int blk_id, u32int blk_cnt);
+
+ /**@brief Close device function.
+ * @param bdev block device.*/
+ int (*close)(struct ext4_blockdev *bdev);
+
+ /**@brief Lock block device. Required in multi partition mode
+ * operations. Not mandatory field.
+ * @param bdev block device.*/
+ int (*lock)(struct ext4_blockdev *bdev);
+
+ /**@brief Unlock block device. Required in multi partition mode
+ * operations. Not mandatory field.
+ * @param bdev block device.*/
+ int (*unlock)(struct ext4_blockdev *bdev);
+
+ /**@brief Block size (bytes): physical*/
+ u32int ph_bsize;
+
+ /**@brief Block count: physical*/
+ u64int ph_bcnt;
+
+ /**@brief Block size buffer: physical*/
+ u8int *ph_bbuf;
+
+ /**@brief Reference counter to block device interface*/
+ u32int ph_refctr;
+
+ /**@brief Physical read counter*/
+ u32int bread_ctr;
+
+ /**@brief Physical write counter*/
+ u32int bwrite_ctr;
+
+ /**@brief User data pointer*/
+ void* p_user;
+};
+
+/**@brief Definition of the simple block device.*/
+struct ext4_blockdev {
+ /**@brief Block device interface*/
+ struct ext4_blockdev_iface *bdif;
+
+ /**@brief Offset in bdif. For multi partition mode.*/
+ u64int part_offset;
+
+ /**@brief Part size in bdif. For multi partition mode.*/
+ u64int part_size;
+
+ /**@brief Block cache.*/
+ struct ext4_bcache *bc;
+
+ /**@brief Block size (bytes) logical*/
+ u32int lg_bsize;
+
+ /**@brief Block count: logical*/
+ u64int lg_bcnt;
+
+ /**@brief Cache write back mode reference counter*/
+ u32int cache_write_back;
+
+ /**@brief The filesystem this block device belongs to. */
+ struct ext4_fs *fs;
+
+ void *journal;
+};
+
+#pragma incomplete struct ext4_blockdev
+
+/**@brief Static initialization of the block device.*/
+#define EXT4_BLOCKDEV_STATIC_INSTANCE(__name, __bsize, __bcnt, __open, __bread,\
+ __bwrite, __close, __lock, __unlock) \
+ static u8int __name##_ph_bbuf[(__bsize)]; \
+ static struct ext4_blockdev_iface __name##_iface = { \
+ .open = __open, \
+ .bread = __bread, \
+ .bwrite = __bwrite, \
+ .close = __close, \
+ .lock = __lock, \
+ .unlock = __unlock, \
+ .ph_bsize = __bsize, \
+ .ph_bcnt = __bcnt, \
+ .ph_bbuf = __name##_ph_bbuf, \
+ }; \
+ static struct ext4_blockdev __name = { \
+ .bdif = &__name##_iface, \
+ .part_offset = 0, \
+ .part_size = (__bcnt) * (__bsize), \
+ }
+
+/**@brief Block device initialization.
+ * @param bdev block device descriptor
+ * @return standard error code*/
+int ext4_block_init(struct ext4_blockdev *bdev);
+
+/**@brief Binds a bcache to block device.
+ * @param bdev block device descriptor
+ * @param bc block cache descriptor
+ * @return standard error code*/
+int ext4_block_bind_bcache(struct ext4_blockdev *bdev, struct ext4_bcache *bc);
+
+/**@brief Close block device
+ * @param bdev block device descriptor
+ * @return standard error code*/
+int ext4_block_fini(struct ext4_blockdev *bdev);
+
+/**@brief Flush data in given buffer to disk.
+ * @param bdev block device descriptor
+ * @param buf buffer
+ * @return standard error code*/
+int ext4_block_flush_buf(struct ext4_blockdev *bdev, struct ext4_buf *buf);
+
+/**@brief Flush data in buffer of given lba to disk,
+ * if that buffer exists in block cache.
+ * @param bdev block device descriptor
+ * @param lba logical block address
+ * @return standard error code*/
+int ext4_block_flush_lba(struct ext4_blockdev *bdev, u64int lba);
+
+/**@brief Set logical block size in block device.
+ * @param bdev block device descriptor
+ * @param lb_size logical block size (in bytes)
+ * @return standard error code*/
+void ext4_block_set_lb_size(struct ext4_blockdev *bdev, u32int lb_bsize);
+
+/**@brief Block get function (through cache, don't read).
+ * @param bdev block device descriptor
+ * @param b block descriptor
+ * @param lba logical block address
+ * @return standard error code*/
+int ext4_block_get_noread(struct ext4_blockdev *bdev, struct ext4_block *b,
+ u64int lba);
+
+/**@brief Block get function (through cache).
+ * @param bdev block device descriptor
+ * @param b block descriptor
+ * @param lba logical block address
+ * @return standard error code*/
+int ext4_block_get(struct ext4_blockdev *bdev, struct ext4_block *b,
+ u64int lba);
+
+/**@brief Block set procedure (through cache).
+ * @param bdev block device descriptor
+ * @param b block descriptor
+ * @return standard error code*/
+int ext4_block_set(struct ext4_blockdev *bdev, struct ext4_block *b);
+
+/**@brief Block read procedure (without cache)
+ * @param bdev block device descriptor
+ * @param buf output buffer
+ * @param lba logical block address
+ * @return standard error code*/
+int ext4_blocks_get_direct(struct ext4_blockdev *bdev, void *buf, u64int lba,
+ u32int cnt);
+
+/**@brief Block write procedure (without cache)
+ * @param bdev block device descriptor
+ * @param buf output buffer
+ * @param lba logical block address
+ * @return standard error code*/
+int ext4_blocks_set_direct(struct ext4_blockdev *bdev, const void *buf,
+ u64int lba, u32int cnt);
+
+/**@brief Write to block device (by direct address).
+ * @param bdev block device descriptor
+ * @param off byte offset in block device
+ * @param buf input buffer
+ * @param len length of the write buffer
+ * @return standard error code*/
+int ext4_block_writebytes(struct ext4_blockdev *bdev, u64int off,
+ const void *buf, u32int len);
+
+/**@brief Read freom block device (by direct address).
+ * @param bdev block device descriptor
+ * @param off byte offset in block device
+ * @param buf input buffer
+ * @param len length of the write buffer
+ * @return standard error code*/
+int ext4_block_readbytes(struct ext4_blockdev *bdev, u64int off, void *buf,
+ u32int len);
+
+/**@brief Flush all dirty buffers to disk
+ * @param bdev block device descriptor
+ * @return standard error code*/
+int ext4_block_cache_flush(struct ext4_blockdev *bdev);
+
+/**@brief Enable/disable write back cache mode
+ * @param bdev block device descriptor
+ * @param on_off
+ * !0 - ENABLE
+ * 0 - DISABLE (all delayed cache buffers will be flushed)
+ * @return standard error code*/
+int ext4_block_cache_write_back(struct ext4_blockdev *bdev, u8int on_off);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_config.h
@@ -1,0 +1,40 @@
+#pragma once
+
+#include <u.h>
+#include <libc.h>
+
+typedef enum { false, true } bool;
+
+enum {
+ O_RDONLY = 00,
+ O_WRONLY = 01,
+ O_RDWR = 02,
+ O_CREAT = 0100,
+ O_EXCL = 0200,
+ O_TRUNC = 01000,
+ O_APPEND = 02000,
+};
+
+#if defined(__mips__) || defined(__power__) || defined(__power64__) || defined(__sparc__) || defined(__sparc64__)
+#define CONFIG_BIG_ENDIAN
+#endif
+
+#define CONFIG_EXT4_MAX_BLOCKDEV_NAME 128
+#define CONFIG_EXT4_MAX_MP_NAME 128
+#define CONFIG_EXT4_BLOCKDEVS_COUNT 32
+#define CONFIG_EXT4_MOUNTPOINTS_COUNT 32
+#define CONFIG_BLOCK_DEV_CACHE_SIZE 1024
+
+/* Maximum single truncate size. Transactions must be limited to reduce
+ * number of allocations for single transaction
+ */
+#define CONFIG_MAX_TRUNCATE_SIZE (16ul * 1024ul * 1024ul)
+
+extern char Eexists[];
+extern char Einval[];
+extern char Eio[];
+extern char Enomem[];
+extern char Enospc[];
+extern char Enotfound[];
+extern char Eperm[];
+extern char Erdonlyfs[];
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_crc32.h
@@ -1,0 +1,18 @@
+/* Based on FreeBSD. */
+#pragma once
+
+#include "ext4_config.h"
+
+/**@brief CRC32 algorithm.
+ * @param crc input feed
+ * @param buf input buffer
+ * @param size input buffer length (bytes)
+ * @return updated crc32 value*/
+u32int ext4_crc32(u32int crc, const void *buf, u32int size);
+
+/**@brief CRC32C algorithm.
+ * @param crc input feed
+ * @param buf input buffer
+ * @param length input buffer length (bytes)
+ * @return updated crc32c value*/
+u32int ext4_crc32c(u32int crc, const void *buf, u32int size);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_debug.h
@@ -1,0 +1,93 @@
+#pragma once
+
+#include "ext4_config.h"
+
+#define DEBUG_BALLOC (1ul << 0)
+#define DEBUG_BCACHE (1ul << 1)
+#define DEBUG_BITMAP (1ul << 2)
+#define DEBUG_BLOCK_GROUP (1ul << 3)
+#define DEBUG_BLOCKDEV (1ul << 4)
+#define DEBUG_DIR_IDX (1ul << 5)
+#define DEBUG_DIR (1ul << 6)
+#define DEBUG_EXTENT (1ul << 7)
+#define DEBUG_FS (1ul << 8)
+#define DEBUG_HASH (1ul << 9)
+#define DEBUG_IALLOC (1ul << 10)
+#define DEBUG_INODE (1ul << 11)
+#define DEBUG_SUPER (1ul << 12)
+#define DEBUG_XATTR (1ul << 13)
+#define DEBUG_MKFS (1ul << 14)
+#define DEBUG_EXT4 (1ul << 15)
+#define DEBUG_JBD (1ul << 16)
+#define DEBUG_MBR (1ul << 17)
+
+#define DEBUG_NOPREFIX (1ul << 31)
+#define DEBUG_ALL (0xFFFFFFFF)
+
+static inline const char *ext4_dmask_id2str(u32int m)
+{
+ switch(m) {
+ case DEBUG_BALLOC:
+ return "ext4_balloc: ";
+ case DEBUG_BCACHE:
+ return "ext4_bcache: ";
+ case DEBUG_BITMAP:
+ return "ext4_bitmap: ";
+ case DEBUG_BLOCK_GROUP:
+ return "ext4_block_group: ";
+ case DEBUG_BLOCKDEV:
+ return "ext4_blockdev: ";
+ case DEBUG_DIR_IDX:
+ return "ext4_dir_idx: ";
+ case DEBUG_DIR:
+ return "ext4_dir: ";
+ case DEBUG_EXTENT:
+ return "ext4_extent: ";
+ case DEBUG_FS:
+ return "ext4_fs: ";
+ case DEBUG_HASH:
+ return "ext4_hash: ";
+ case DEBUG_IALLOC:
+ return "ext4_ialloc: ";
+ case DEBUG_INODE:
+ return "ext4_inode: ";
+ case DEBUG_SUPER:
+ return "ext4_super: ";
+ case DEBUG_MKFS:
+ return "ext4_mkfs: ";
+ case DEBUG_JBD:
+ return "ext4_jbd: ";
+ case DEBUG_MBR:
+ return "ext4_mbr: ";
+ case DEBUG_EXT4:
+ return "ext4: ";
+ }
+ return "";
+}
+#define DBG_NONE ""
+#define DBG_INFO "[info] "
+#define DBG_WARN "[warn] "
+#define DBG_ERROR "[error] "
+
+/**@brief Global mask debug set.
+ * @brief m new debug mask.*/
+void ext4_dmask_set(u32int m);
+
+/**@brief Global mask debug clear.
+ * @brief m new debug mask.*/
+void ext4_dmask_clr(u32int m);
+
+/**@brief Global debug mask get.
+ * @return debug mask*/
+u32int ext4_dmask_get(void);
+
+/**@brief Debug printf.*/
+#define ext4_dbg(m, ...) \
+ do { \
+ if ((m) & ext4_dmask_get()) { \
+ if (!((m) & DEBUG_NOPREFIX)) { \
+ fprint(2, "%s: %s", __func__, ext4_dmask_id2str(m)); \
+ } \
+ fprint(2, __VA_ARGS__); \
+ } \
+ } while (0)
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_dir.h
@@ -1,0 +1,243 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_blockdev.h"
+#include "ext4_super.h"
+
+struct ext4_dir_iter {
+ struct ext4_inode_ref *inode_ref;
+ struct ext4_block curr_blk;
+ u64int curr_off;
+ struct ext4_dir_en *curr;
+};
+
+struct ext4_dir_search_result {
+ struct ext4_block block;
+ struct ext4_dir_en *dentry;
+};
+
+
+/**@brief Get i-node number from directory entry.
+ * @param de Directory entry
+ * @return I-node number
+ */
+static inline u32int
+ext4_dir_en_get_inode(struct ext4_dir_en *de)
+{
+ return to_le32(de->inode);
+}
+
+/**@brief Set i-node number to directory entry.
+ * @param de Directory entry
+ * @param inode I-node number
+ */
+static inline void
+ext4_dir_en_set_inode(struct ext4_dir_en *de, u32int inode)
+{
+ de->inode = to_le32(inode);
+}
+
+/**@brief Set i-node number to directory entry. (For HTree root)
+ * @param de Directory entry
+ * @param inode I-node number
+ */
+static inline void
+ext4_dx_dot_en_set_inode(struct ext4_dir_idx_dot_en *de, u32int inode)
+{
+ de->inode = to_le32(inode);
+}
+
+/**@brief Get directory entry length.
+ * @param de Directory entry
+ * @return Entry length
+ */
+static inline u16int ext4_dir_en_get_entry_len(struct ext4_dir_en *de)
+{
+ return to_le16(de->entry_len);
+}
+
+/**@brief Set directory entry length.
+ * @param de Directory entry
+ * @param length Entry length
+ */
+static inline void ext4_dir_en_set_entry_len(struct ext4_dir_en *de, u16int l)
+{
+ de->entry_len = to_le16(l);
+}
+
+/**@brief Get directory entry name length.
+ * @param sb Superblock
+ * @param de Directory entry
+ * @return Entry name length
+ */
+static inline u16int ext4_dir_en_get_name_len(struct ext4_sblock *sb,
+ struct ext4_dir_en *de)
+{
+ u16int v = de->name_len;
+
+ if ((ext4_get32(sb, rev_level) == 0) &&
+ (ext4_get32(sb, minor_rev_level) < 5))
+ v |= ((u16int)de->in.name_length_high) << 8;
+
+ return v;
+}
+
+/**@brief Set directory entry name length.
+ * @param sb Superblock
+ * @param de Directory entry
+ * @param length Entry name length
+ */
+static inline void ext4_dir_en_set_name_len(struct ext4_sblock *sb,
+ struct ext4_dir_en *de,
+ u16int len)
+{
+ de->name_len = (len << 8) >> 8;
+
+ if ((ext4_get32(sb, rev_level) == 0) &&
+ (ext4_get32(sb, minor_rev_level) < 5))
+ de->in.name_length_high = len >> 8;
+}
+
+/**@brief Get i-node type of directory entry.
+ * @param sb Superblock
+ * @param de Directory entry
+ * @return I-node type (file, dir, etc.)
+ */
+static inline u8int ext4_dir_en_get_inode_type(struct ext4_sblock *sb,
+ struct ext4_dir_en *de)
+{
+ if ((ext4_get32(sb, rev_level) > 0) ||
+ (ext4_get32(sb, minor_rev_level) >= 5))
+ return de->in.inode_type;
+
+ return EXT4_DE_UNKNOWN;
+}
+/**@brief Set i-node type of directory entry.
+ * @param sb Superblock
+ * @param de Directory entry
+ * @param type I-node type (file, dir, etc.)
+ */
+
+static inline void ext4_dir_en_set_inode_type(struct ext4_sblock *sb,
+ struct ext4_dir_en *de, u8int t)
+{
+ if ((ext4_get32(sb, rev_level) > 0) ||
+ (ext4_get32(sb, minor_rev_level) >= 5))
+ de->in.inode_type = t;
+}
+
+/**@brief Verify checksum of a linear directory leaf block
+ * @param inode_ref Directory i-node
+ * @param dirent Linear directory leaf block
+ * @return true means the block passed checksum verification
+ */
+bool ext4_dir_csum_verify(struct ext4_inode_ref *inode_ref,
+ struct ext4_dir_en *dirent);
+
+/**@brief Initialize directory iterator.
+ * Set position to the first valid entry from the required position.
+ * @param it Pointer to iterator to be initialized
+ * @param inode_ref Directory i-node
+ * @param pos Position to start reading entries from
+ * @return Error code
+ */
+int ext4_dir_iterator_init(struct ext4_dir_iter *it,
+ struct ext4_inode_ref *inode_ref, u64int pos);
+
+/**@brief Jump to the next valid entry
+ * @param it Initialized iterator
+ * @return Error code
+ */
+int ext4_dir_iterator_next(struct ext4_dir_iter *it);
+
+/**@brief Uninitialize directory iterator.
+ * Release all allocated structures.
+ * @param it Iterator to be finished
+ * @return Error code
+ */
+int ext4_dir_iterator_fini(struct ext4_dir_iter *it);
+
+/**@brief Write directory entry to concrete data block.
+ * @param sb Superblock
+ * @param en Pointer to entry to be written
+ * @param entry_len Length of new entry
+ * @param child Child i-node to be written to new entry
+ * @param name Name of the new entry
+ * @param name_len Length of entry name
+ */
+void ext4_dir_write_entry(struct ext4_sblock *sb, struct ext4_dir_en *en,
+ u16int entry_len, struct ext4_inode_ref *child,
+ const char *name, usize name_len);
+
+/**@brief Add new entry to the directory.
+ * @param parent Directory i-node
+ * @param name Name of new entry
+ * @param child I-node to be referenced from new entry
+ * @return Error code
+ */
+int ext4_dir_add_entry(struct ext4_inode_ref *parent, const char *name,
+ u32int name_len, struct ext4_inode_ref *child);
+
+/**@brief Find directory entry with passed name.
+ * @param result Result structure to be returned if entry found
+ * @param parent Directory i-node
+ * @param name Name of entry to be found
+ * @param name_len Name length
+ * @return Error code
+ */
+int ext4_dir_find_entry(struct ext4_dir_search_result *result,
+ struct ext4_inode_ref *parent, const char *name,
+ u32int name_len);
+
+/**@brief Remove directory entry.
+ * @param parent Directory i-node
+ * @param name Name of the entry to be removed
+ * @param name_len Name length
+ * @return Error code
+ */
+int ext4_dir_remove_entry(struct ext4_inode_ref *parent, const char *name,
+ u32int name_len);
+
+/**@brief Try to insert entry to concrete data block.
+ * @param sb Superblock
+ * @param inode_ref Directory i-node
+ * @param dst_blk Block to try to insert entry to
+ * @param child Child i-node to be inserted by new entry
+ * @param name Name of the new entry
+ * @param name_len Length of the new entry name
+ * @return Error code
+ */
+int ext4_dir_try_insert_entry(struct ext4_sblock *sb,
+ struct ext4_inode_ref *inode_ref,
+ struct ext4_block *dst_blk,
+ struct ext4_inode_ref *child, const char *name,
+ u32int name_len);
+
+/**@brief Try to find entry in block by name.
+ * @param block Block containing entries
+ * @param sb Superblock
+ * @param name_len Length of entry name
+ * @param name Name of entry to be found
+ * @param res_entry Output pointer to found entry, nil if not found
+ * @return Error code
+ */
+int ext4_dir_find_in_block(struct ext4_block *block, struct ext4_sblock *sb,
+ usize name_len, const char *name,
+ struct ext4_dir_en **res_entry);
+
+/**@brief Simple function to release allocated data from result.
+ * @param parent Parent inode
+ * @param result Search result to destroy
+ * @return Error code
+ *
+ */
+int ext4_dir_destroy_result(struct ext4_inode_ref *parent,
+ struct ext4_dir_search_result *result);
+
+void ext4_dir_set_csum(struct ext4_inode_ref *inode_ref,
+ struct ext4_dir_en *dirent);
+
+
+void ext4_dir_init_entry_tail(struct ext4_dir_entry_tail *t);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_dir_idx.h
@@ -1,0 +1,52 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_fs.h"
+#include "ext4_dir.h"
+
+struct ext4_dir_idx_block {
+ struct ext4_block b;
+ struct ext4_dir_idx_entry *entries;
+ struct ext4_dir_idx_entry *position;
+};
+
+#define EXT4_DIR_DX_INIT_BCNT 2
+
+
+/**@brief Initialize index structure of new directory.
+ * @param dir Pointer to directory i-node
+ * @param dir Pointer to parent directory i-node
+ * @return Error code
+ */
+int ext4_dir_dx_init(struct ext4_inode_ref *dir,
+ struct ext4_inode_ref *parent);
+
+/**@brief Try to find directory entry using directory index.
+ * @param result Output value - if entry will be found,
+ * than will be passed through this parameter
+ * @param inode_ref Directory i-node
+ * @param name_len Length of name to be found
+ * @param name Name to be found
+ * @return Error code
+ */
+int ext4_dir_dx_find_entry(struct ext4_dir_search_result *result,
+ struct ext4_inode_ref *inode_ref, usize name_len,
+ const char *name);
+
+/**@brief Add new entry to indexed directory
+ * @param parent Directory i-node
+ * @param child I-node to be referenced from directory entry
+ * @param name Name of new directory entry
+ * @return Error code
+ */
+int ext4_dir_dx_add_entry(struct ext4_inode_ref *parent,
+ struct ext4_inode_ref *child, const char *name, u32int name_len);
+
+/**@brief Add new entry to indexed directory
+ * @param dir Directory i-node
+ * @param parent_inode parent inode index
+ * @return Error code
+ */
+int ext4_dir_dx_reset_parent_inode(struct ext4_inode_ref *dir,
+ u32int parent_inode);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_extent.h
@@ -1,0 +1,312 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+#include "ext4_inode.h"
+
+/*
+ * Array of ext4_ext_path contains path to some extent.
+ * Creation/lookup routines use it for traversal/splitting/etc.
+ * Truncate uses it to simulate recursive walking.
+ */
+struct ext4_extent_path {
+ struct ext4_block block;
+ u16int depth;
+ struct ext4_extent_header *header;
+ struct ext4_extent_index *index;
+ struct ext4_extent *extent;
+};
+
+#define EXT4_EXT_UNWRITTEN_MASK (1L << 15)
+
+#define EXT4_EXT_MAX_LEN_WRITTEN (1L << 15)
+#define EXT4_EXT_MAX_LEN_UNWRITTEN \
+ (EXT4_EXT_MAX_LEN_WRITTEN - 1)
+
+#define EXT4_EXT_GET_LEN(ex) to_le16((ex)->nblocks)
+#define EXT4_EXT_GET_LEN_UNWRITTEN(ex) \
+ (EXT4_EXT_GET_LEN(ex) & ~(EXT4_EXT_UNWRITTEN_MASK))
+#define EXT4_EXT_SET_LEN(ex, count) \
+ ((ex)->nblocks = to_le16(count))
+
+#define EXT4_EXT_IS_UNWRITTEN(ex) \
+ (EXT4_EXT_GET_LEN(ex) > EXT4_EXT_MAX_LEN_WRITTEN)
+#define EXT4_EXT_SET_UNWRITTEN(ex) \
+ ((ex)->nblocks |= to_le16(EXT4_EXT_UNWRITTEN_MASK))
+#define EXT4_EXT_SET_WRITTEN(ex) \
+ ((ex)->nblocks &= ~(to_le16(EXT4_EXT_UNWRITTEN_MASK)))
+
+#define EXT4_EXTENT_FIRST(header) \
+ ((struct ext4_extent *)(((char *)(header)) + \
+ sizeof(struct ext4_extent_header)))
+
+#define EXT4_EXTENT_FIRST_INDEX(header) \
+ ((struct ext4_extent_index *)(((char *)(header)) + \
+ sizeof(struct ext4_extent_header)))
+
+#define EXT4_EXTENT_LAST(header) \
+ ((struct ext4_extent *)(((char *)(header)) + \
+ sizeof(struct ext4_extent_header)) + \
+ (header)->nentries - 1)
+
+#define EXT4_EXTENT_LAST_INDEX(header) \
+ ((struct ext4_extent_index *)(((char *)(header)) + \
+ sizeof(struct ext4_extent_header)) + \
+ (header)->nentries - 1)
+
+#define EXT4_EXTENT_SIZE sizeof(struct ext4_extent)
+#define EXT4_EXTENT_INDEX_SIZE sizeof(struct ext4_extent_index)
+
+#define EXT4_EXTENT_TAIL_OFFSET(hdr) \
+ (sizeof(struct ext4_extent_header) + \
+ (sizeof(struct ext4_extent) * to_le16((hdr)->max_nentries)))
+
+#define EXT4_EXTENT_IN_RANGE(iblock, eiblock, len) \
+ ((iblock) >= (eiblock) && (iblock) <= (eiblock) + (len) - 1)
+
+#define EXT4_EXTENT_MAX_BLOCKS ((u32int)(-1))
+
+/**@brief Get logical number of the block covered by extent.
+ * @param extent Extent to load number from
+ * @return Logical number of the first block covered by extent */
+static inline u32int ext4_extent_get_iblock(struct ext4_extent *extent)
+{
+ return to_le32(extent->iblock);
+}
+
+/**@brief Set logical number of the first block covered by extent.
+ * @param extent Extent to set number to
+ * @param iblock Logical number of the first block covered by extent */
+static inline void ext4_extent_set_iblock(struct ext4_extent *extent,
+ ext4_lblk_t iblock)
+{
+ extent->iblock = to_le32(iblock);
+}
+
+/**@brief Get number of blocks covered by extent.
+ * @param extent Extent to load count from
+ * @return Number of blocks covered by extent */
+static inline u16int ext4_extent_get_nblocks(struct ext4_extent *extent)
+{
+ if (EXT4_EXT_IS_UNWRITTEN(extent))
+ return EXT4_EXT_GET_LEN_UNWRITTEN(extent);
+ else
+ return EXT4_EXT_GET_LEN(extent);
+}
+/**@brief Set number of blocks covered by extent.
+ * @param extent Extent to load count from
+ * @param count Number of blocks covered by extent
+ * @param unwritten Whether the extent is unwritten or not */
+static inline void
+ext4_extent_set_nblocks(struct ext4_extent *extent,
+ u16int count, bool unwritten)
+{
+ EXT4_EXT_SET_LEN(extent, count);
+ if (unwritten)
+ EXT4_EXT_SET_UNWRITTEN(extent);
+}
+
+/**@brief Get physical number of the first block covered by extent.
+ * @param extent Extent to load number
+ * @return Physical number of the first block covered by extent */
+static inline u64int ext4_extent_get_fblock(struct ext4_extent *extent)
+{
+ return ((u64int)to_le16(extent->fblock_hi)) << 32 |
+ ((u64int)to_le32(extent->fblock_lo));
+}
+
+
+/**@brief Set physical number of the first block covered by extent.
+ * @param extent Extent to load number
+ * @param fblock Physical number of the first block covered by extent */
+static inline void
+ext4_extent_set_fblock(struct ext4_extent *extent, u64int fblock)
+{
+ extent->fblock_lo = to_le32((fblock << 32) >> 32);
+ extent->fblock_hi = to_le16((u16int)(fblock >> 32));
+}
+
+
+/**@brief Get logical number of the block covered by extent index.
+ * @param index Extent index to load number from
+ * @return Logical number of the first block covered by extent index */
+static inline u32int
+ext4_extent_index_get_iblock(struct ext4_extent_index *index)
+{
+ return to_le32(index->iblock);
+}
+
+/**@brief Set logical number of the block covered by extent index.
+ * @param index Extent index to set number to
+ * @param iblock Logical number of the first block covered by extent index */
+static inline void
+ext4_extent_index_set_iblock(struct ext4_extent_index *index,
+ u32int iblock)
+{
+ index->iblock = to_le32(iblock);
+}
+
+/**@brief Get physical number of block where the child node is located.
+ * @param index Extent index to load number from
+ * @return Physical number of the block with child node */
+static inline u64int
+ext4_extent_index_get_fblock(struct ext4_extent_index *index)
+{
+ return ((u64int)to_le16(index->fblock_hi)) << 32 |
+ ((u64int)to_le32(index->fblock_lo));
+}
+
+/**@brief Set physical number of block where the child node is located.
+ * @param index Extent index to set number to
+ * @param fblock Ohysical number of the block with child node */
+static inline void ext4_extent_index_set_fblock(struct ext4_extent_index *index,
+ u64int fblock)
+{
+ index->fblock_lo = to_le32((fblock << 32) >> 32);
+ index->fblock_hi = to_le16((u16int)(fblock >> 32));
+}
+
+/**@brief Get magic value from extent header.
+ * @param header Extent header to load value from
+ * @return Magic value of extent header */
+static inline u16int
+ext4_extent_header_get_magic(struct ext4_extent_header *header)
+{
+ return to_le16(header->magic);
+}
+
+/**@brief Set magic value to extent header.
+ * @param header Extent header to set value to
+ * @param magic Magic value of extent header */
+static inline void ext4_extent_header_set_magic(struct ext4_extent_header *header,
+ u16int magic)
+{
+ header->magic = to_le16(magic);
+}
+
+/**@brief Get number of entries from extent header
+ * @param header Extent header to get value from
+ * @return Number of entries covered by extent header */
+static inline u16int
+ext4_extent_header_get_nentries(struct ext4_extent_header *header)
+{
+ return to_le16(header->nentries);
+}
+
+/**@brief Set number of entries to extent header
+ * @param header Extent header to set value to
+ * @param count Number of entries covered by extent header */
+static inline void
+ext4_extent_header_set_nentries(struct ext4_extent_header *header,
+ u16int count)
+{
+ header->nentries = to_le16(count);
+}
+
+/**@brief Get maximum number of entries from extent header
+ * @param header Extent header to get value from
+ * @return Maximum number of entries covered by extent header */
+static inline u16int
+ext4_extent_header_get_max_nentries(struct ext4_extent_header *header)
+{
+ return to_le16(header->max_nentries);
+}
+
+/**@brief Set maximum number of entries to extent header
+ * @param header Extent header to set value to
+ * @param max_count Maximum number of entries covered by extent header */
+static inline void
+ext4_extent_header_set_max_nentries(struct ext4_extent_header *header,
+ u16int max_count)
+{
+ header->max_nentries = to_le16(max_count);
+}
+
+/**@brief Get depth of extent subtree.
+ * @param header Extent header to get value from
+ * @return Depth of extent subtree */
+static inline u16int
+ext4_extent_header_get_depth(struct ext4_extent_header *header)
+{
+ return to_le16(header->depth);
+}
+
+/**@brief Set depth of extent subtree.
+ * @param header Extent header to set value to
+ * @param depth Depth of extent subtree */
+static inline void
+ext4_extent_header_set_depth(struct ext4_extent_header *header,
+ u16int depth)
+{
+ header->depth = to_le16(depth);
+}
+
+/**@brief Get generation from extent header
+ * @param header Extent header to get value from
+ * @return Generation */
+static inline u32int
+ext4_extent_header_get_generation(struct ext4_extent_header *header)
+{
+ return to_le32(header->generation);
+}
+
+/**@brief Set generation to extent header
+ * @param header Extent header to set value to
+ * @param generation Generation */
+static inline void
+ext4_extent_header_set_generation(struct ext4_extent_header *header,
+ u32int generation)
+{
+ header->generation = to_le32(generation);
+}
+
+/******************************************************************************/
+
+/**TODO: */
+static inline void ext4_extent_tree_init(struct ext4_inode_ref *inode_ref)
+{
+ /* Initialize extent root header */
+ struct ext4_extent_header *header =
+ ext4_inode_get_extent_header(inode_ref->inode);
+ ext4_extent_header_set_depth(header, 0);
+ ext4_extent_header_set_nentries(header, 0);
+ ext4_extent_header_set_generation(header, 0);
+ ext4_extent_header_set_magic(header, EXT4_EXTENT_MAGIC);
+
+ u16int max_entries = (EXT4_INODE_BLOCKS * sizeof(u32int) -
+ sizeof(struct ext4_extent_header)) /
+ sizeof(struct ext4_extent);
+
+ ext4_extent_header_set_max_nentries(header, max_entries);
+ inode_ref->dirty = true;
+}
+
+
+
+/**@brief Extent-based blockmap manipulation
+ * @param inode_ref I-node
+ * @param iblock starting logical block of the inode
+ * @param max_nblocks maximum number of blocks to get from/allocate to blockmap
+ * @param resfblockp return physical block address of the first block of an
+ * extent
+ * @param create true if caller wants to insert mapping or convert
+ * unwritten mapping to written one
+ * @param resnblocksp return number of blocks in an extent (must be smaller than
+ * \p max_nblocks)
+ * @return Error code*/
+int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref,
+ ext4_lblk_t iblock,
+ ext4_lblk_t max_nblocks,
+ ext4_fsblk_t *resfblockp,
+ bool create,
+ ext4_lblk_t *resnblocksp);
+
+
+/**@brief Release all data blocks starting from specified logical block.
+ * @param inode_ref I-node to release blocks from
+ * @param iblock_from First logical block to release
+ * @return Error code */
+int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref,
+ ext4_lblk_t from,
+ ext4_lblk_t to);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_fs.h
@@ -1,0 +1,222 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+
+struct ext4_fs {
+ bool read_only;
+
+ struct ext4_blockdev *bdev;
+ struct ext4_sblock sb;
+
+ u64int inode_block_limits[4];
+ u64int inode_blocks_per_level[4];
+
+ u32int last_inode_bg_id;
+
+ struct jbd_fs *jbd_fs;
+ struct jbd_journal *jbd_journal;
+ struct jbd_trans *curr_trans;
+};
+
+struct ext4_block_group_ref {
+ struct ext4_block block;
+ struct ext4_bgroup *block_group;
+ struct ext4_fs *fs;
+ u32int index;
+ bool dirty;
+};
+
+struct ext4_inode_ref {
+ struct ext4_block block;
+ struct ext4_inode *inode;
+ struct ext4_fs *fs;
+ u32int index;
+ bool dirty;
+};
+
+#pragma incomplete struct ext4_fs
+
+/**@brief Convert block address to relative index in block group.
+ * @param sb Superblock pointer
+ * @param baddr Block number to convert
+ * @return Relative number of block
+ */
+static inline u32int ext4_fs_addr_to_idx_bg(struct ext4_sblock *s,
+ ext4_fsblk_t baddr)
+{
+ if (ext4_get32(s, first_data_block) && baddr)
+ baddr--;
+
+ return baddr % ext4_get32(s, blocks_per_group);
+}
+
+/**@brief Convert relative block address in group to absolute address.
+ * @param s Superblock pointer
+ * @param index Relative block address
+ * @param bgid Block group
+ * @return Absolute block address
+ */
+static inline ext4_fsblk_t ext4_fs_bg_idx_to_addr(struct ext4_sblock *s,
+ u32int index,
+ u32int bgid)
+{
+ if (ext4_get32(s, first_data_block))
+ index++;
+
+ return ext4_get32(s, blocks_per_group) * bgid + index;
+}
+
+/**@brief TODO: */
+static inline ext4_fsblk_t ext4_fs_first_bg_block_no(struct ext4_sblock *s,
+ u32int bgid)
+{
+ return (u64int)bgid * ext4_get32(s, blocks_per_group) +
+ ext4_get32(s, first_data_block);
+}
+
+/**@brief Initialize filesystem and read all needed data.
+ * @param fs Filesystem instance to be initialized
+ * @param bdev Identifier if device with the filesystem
+ * @param read_only Mark the filesystem as read-only.
+ * @return Error code
+ */
+int ext4_fs_init(struct ext4_fs *fs, struct ext4_blockdev *bdev,
+ bool read_only);
+
+/**@brief Destroy filesystem instance (used by unmount operation).
+ * @param fs Filesystem to be destroyed
+ * @return Error code
+ */
+int ext4_fs_fini(struct ext4_fs *fs);
+
+/**@brief Check filesystem's features, if supported by this driver
+ * Function can return 0 and set read_only flag. It mean's that
+ * there are some not-supported features, that can cause problems
+ * during some write operations.
+ * @param fs Filesystem to be checked
+ * @param read_only Flag if filesystem should be mounted only for reading
+ * @return Error code
+ */
+int ext4_fs_check_features(struct ext4_fs *fs, bool *read_only);
+
+/**@brief Get reference to block group specified by index.
+ * @param fs Filesystem to find block group on
+ * @param bgid Index of block group to load
+ * @param ref Output pointer for reference
+ * @return Error code
+ */
+int ext4_fs_get_block_group_ref(struct ext4_fs *fs, u32int bgid,
+ struct ext4_block_group_ref *ref);
+
+/**@brief Put reference to block group.
+ * @param ref Pointer for reference to be put back
+ * @return Error code
+ */
+int ext4_fs_put_block_group_ref(struct ext4_block_group_ref *ref);
+
+/**@brief Get reference to i-node specified by index.
+ * @param fs Filesystem to find i-node on
+ * @param index Index of i-node to load
+ * @param ref Output pointer for reference
+ * @return Error code
+ */
+int ext4_fs_get_inode_ref(struct ext4_fs *fs, u32int index,
+ struct ext4_inode_ref *ref);
+
+/**@brief Reset blocks field of i-node.
+ * @param fs Filesystem to reset blocks field of i-inode on
+ * @param inode_ref ref Pointer for inode to be operated on
+ */
+void ext4_fs_inode_blocks_init(struct ext4_fs *fs,
+ struct ext4_inode_ref *inode_ref);
+
+/**@brief Put reference to i-node.
+ * @param ref Pointer for reference to be put back
+ * @return Error code
+ */
+int ext4_fs_put_inode_ref(struct ext4_inode_ref *ref);
+
+/**@brief Convert filetype to inode mode.
+ * @param filetype
+ * @return inode mode
+ */
+u32int ext4_fs_correspond_inode_mode(int filetype);
+
+/**@brief Allocate new i-node in the filesystem.
+ * @param fs Filesystem to allocated i-node on
+ * @param inode_ref Output pointer to return reference to allocated i-node
+ * @param filetype File type of newly created i-node
+ * @return Error code
+ */
+int ext4_fs_alloc_inode(struct ext4_fs *fs, struct ext4_inode_ref *inode_ref,
+ int filetype);
+
+/**@brief Release i-node and mark it as free.
+ * @param inode_ref I-node to be released
+ * @return Error code
+ */
+int ext4_fs_free_inode(struct ext4_inode_ref *inode_ref);
+
+/**@brief Truncate i-node data blocks.
+ * @param inode_ref I-node to be truncated
+ * @param new_size New size of inode (must be < current size)
+ * @return Error code
+ */
+int ext4_fs_truncate_inode(struct ext4_inode_ref *inode_ref, u64int new_size);
+
+/**@brief Compute 'goal' for inode index
+ * @param inode_ref Reference to inode, to allocate block for
+ * @return goal
+ */
+ext4_fsblk_t ext4_fs_inode_to_goal_block(struct ext4_inode_ref *inode_ref);
+
+/**@brief Compute 'goal' for allocation algorithm (For blockmap).
+ * @param inode_ref Reference to inode, to allocate block for
+ * @param goal
+ * @return error code
+ */
+int ext4_fs_indirect_find_goal(struct ext4_inode_ref *inode_ref,
+ ext4_fsblk_t *goal);
+
+/**@brief Get physical block address by logical index of the block.
+ * @param inode_ref I-node to read block address from
+ * @param iblock Logical index of block
+ * @param fblock Output pointer for return physical
+ * block address
+ * @param support_unwritten Indicate whether unwritten block range
+ * is supported under the current context
+ * @return Error code
+ */
+int ext4_fs_get_inode_dblk_idx(struct ext4_inode_ref *inode_ref,
+ ext4_lblk_t iblock, ext4_fsblk_t *fblock,
+ bool support_unwritten);
+
+/**@brief Initialize a part of unwritten range of the inode.
+ * @param inode_ref I-node to proceed on.
+ * @param iblock Logical index of block
+ * @param fblock Output pointer for return physical block address
+ * @return Error code
+ */
+int ext4_fs_init_inode_dblk_idx(struct ext4_inode_ref *inode_ref,
+ ext4_lblk_t iblock, ext4_fsblk_t *fblock);
+
+/**@brief Append following logical block to the i-node.
+ * @param inode_ref I-node to append block to
+ * @param fblock Output physical block address of newly allocated block
+ * @param iblock Output logical number of newly allocated block
+ * @return Error code
+ */
+int ext4_fs_append_inode_dblk(struct ext4_inode_ref *inode_ref,
+ ext4_fsblk_t *fblock, ext4_lblk_t *iblock);
+
+/**@brief Increment inode link count.
+ * @param inode none handle
+ */
+void ext4_fs_inode_links_count_inc(struct ext4_inode_ref *inode_ref);
+
+/**@brief Decrement inode link count.
+ * @param inode none handle
+ */
+void ext4_fs_inode_links_count_dec(struct ext4_inode_ref *inode_ref);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_hash.h
@@ -1,0 +1,22 @@
+#pragma once
+
+#include "ext4_config.h"
+
+struct ext4_hash_info {
+ u32int hash;
+ u32int minor_hash;
+ u32int hash_version;
+ const u32int *seed;
+};
+
+/**@brief Directory entry name hash function.
+ * @param name entry name
+ * @param len entry name length
+ * @param hash_seed (from superblock)
+ * @param hash version (from superblock)
+ * @param hash_minor output value
+ * @param hash_major output value
+ * @return standard error code*/
+int ext2_htree_hash(const char *name, int len, const u32int *hash_seed,
+ int hash_version, u32int *hash_major,
+ u32int *hash_minor);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_ialloc.h
@@ -1,0 +1,29 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+
+/**@brief Calculate and set checksum of inode bitmap.
+ * @param sb superblock pointer.
+ * @param bg block group
+ * @param bitmap bitmap buffer
+ */
+void ext4_ialloc_set_bitmap_csum(struct ext4_sblock *sb, struct ext4_bgroup *bg,
+ void *bitmap);
+
+/**@brief Free i-node number and modify filesystem data structers.
+ * @param fs Filesystem, where the i-node is located
+ * @param index Index of i-node to be release
+ * @param is_dir Flag us for information whether i-node is directory or not
+ */
+int ext4_ialloc_free_inode(struct ext4_fs *fs, u32int index, bool is_dir);
+
+/**@brief I-node allocation algorithm.
+ * This is more simple algorithm, than Orlov allocator used
+ * in the Linux kernel.
+ * @param fs Filesystem to allocate i-node on
+ * @param index Output value - allocated i-node number
+ * @param is_dir Flag if allocated i-node will be file or directory
+ * @return Error code
+ */
+int ext4_ialloc_alloc_inode(struct ext4_fs *fs, u32int *index, bool is_dir);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_inode.h
@@ -1,0 +1,304 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+
+#pragma incomplete struct ext4_extent_header
+
+/**@brief Get mode of the i-node.
+ * @param sb Superblock
+ * @param inode I-node to load mode from
+ * @return Mode of the i-node
+ */
+u32int ext4_inode_get_mode(struct ext4_sblock *sb, struct ext4_inode *inode);
+
+/**@brief Set mode of the i-node.
+ * @param sb Superblock
+ * @param inode I-node to set mode to
+ * @param mode Mode to set to i-node
+ */
+void ext4_inode_set_mode(struct ext4_sblock *sb, struct ext4_inode *inode,
+ u32int mode);
+
+/**@brief Get ID of the i-node owner (user id).
+ * @param inode I-node to load uid from
+ * @return User ID of the i-node owner
+ */
+u32int ext4_inode_get_uid(struct ext4_inode *inode);
+
+/**@brief Set ID of the i-node owner.
+ * @param inode I-node to set uid to
+ * @param uid ID of the i-node owner
+ */
+void ext4_inode_set_uid(struct ext4_inode *inode, u32int uid);
+
+/**@brief Get real i-node size.
+ * @param sb Superblock
+ * @param inode I-node to load size from
+ * @return Real size of i-node
+ */
+u64int ext4_inode_get_size(struct ext4_sblock *sb, struct ext4_inode *inode);
+
+/**@brief Set real i-node size.
+ * @param inode I-node to set size to
+ * @param size Size of the i-node
+ */
+void ext4_inode_set_size(struct ext4_inode *inode, u64int size);
+
+/**@brief Get time, when i-node was last accessed.
+ * @param inode I-node
+ * @return Time of the last access (POSIX)
+ */
+u32int ext4_inode_get_access_time(struct ext4_inode *inode);
+
+/**@brief Set time, when i-node was last accessed.
+ * @param inode I-node
+ * @param time Time of the last access (POSIX)
+ */
+void ext4_inode_set_access_time(struct ext4_inode *inode, u32int time);
+
+/**@brief Get time, when i-node was last changed.
+ * @param inode I-node
+ * @return Time of the last change (POSIX)
+ */
+u32int ext4_inode_get_change_inode_time(struct ext4_inode *inode);
+
+/**@brief Set time, when i-node was last changed.
+ * @param inode I-node
+ * @param time Time of the last change (POSIX)
+ */
+void ext4_inode_set_change_inode_time(struct ext4_inode *inode, u32int time);
+
+/**@brief Get time, when i-node content was last modified.
+ * @param inode I-node
+ * @return Time of the last content modification (POSIX)
+ */
+u32int ext4_inode_get_modif_time(struct ext4_inode *inode);
+
+/**@brief Set time, when i-node content was last modified.
+ * @param inode I-node
+ * @param time Time of the last content modification (POSIX)
+ */
+void ext4_inode_set_modif_time(struct ext4_inode *inode, u32int time);
+
+/**@brief Get time, when i-node was deleted.
+ * @param inode I-node
+ * @return Time of the delete action (POSIX)
+ */
+u32int ext4_inode_get_del_time(struct ext4_inode *inode);
+
+/**@brief Get time, when i-node was created.
+ * @param inode I-node
+ * @return Time of the create action (POSIX)
+ */
+u32int ext4_inode_get_creation_time(struct ext4_inode *inode);
+
+/**@brief Set time, when i-node was deleted.
+ * @param inode I-node
+ * @param time Time of the delete action (POSIX)
+ */
+void ext4_inode_set_del_time(struct ext4_inode *inode, u32int time);
+
+/**@brief Get ID of the i-node owner's group.
+ * @param inode I-node to load gid from
+ * @return Group ID of the i-node owner
+ */
+u32int ext4_inode_get_gid(struct ext4_inode *inode);
+
+/**@brief Set ID to the i-node owner's group.
+ * @param inode I-node to set gid to
+ * @param gid Group ID of the i-node owner
+ */
+void ext4_inode_set_gid(struct ext4_inode *inode, u32int gid);
+
+/**@brief Get number of links to i-node.
+ * @param inode I-node to load number of links from
+ * @return Number of links to i-node
+ */
+u16int ext4_inode_get_links_cnt(struct ext4_inode *inode);
+
+/**@brief Set number of links to i-node.
+ * @param inode I-node to set number of links to
+ * @param count Number of links to i-node
+ */
+void ext4_inode_set_links_cnt(struct ext4_inode *inode, u16int cnt);
+
+/**@brief Get number of 512-bytes blocks used for i-node.
+ * @param sb Superblock
+ * @param inode I-node
+ * @return Number of 512-bytes blocks
+ */
+u64int ext4_inode_get_blocks_count(struct ext4_sblock *sb,
+ struct ext4_inode *inode);
+
+/**@brief Set number of 512-bytes blocks used for i-node.
+ * @param sb Superblock
+ * @param inode I-node
+ * @param count Number of 512-bytes blocks
+ * @return Error code
+ */
+int ext4_inode_set_blocks_count(struct ext4_sblock *sb,
+ struct ext4_inode *inode, u64int cnt);
+
+/**@brief Get flags (features) of i-node.
+ * @param inode I-node to get flags from
+ * @return Flags (bitmap)
+ */
+u32int ext4_inode_get_flags(struct ext4_inode *inode);
+
+/**@brief Set flags (features) of i-node.
+ * @param inode I-node to set flags to
+ * @param flags Flags to set to i-node
+ */
+void ext4_inode_set_flags(struct ext4_inode *inode, u32int flags);
+
+/**@brief Get file generation (used by NFS).
+ * @param inode I-node
+ * @return File generation
+ */
+u32int ext4_inode_get_generation(struct ext4_inode *inode);
+
+/**@brief Set file generation (used by NFS).
+ * @param inode I-node
+ * @param generation File generation
+ */
+void ext4_inode_set_generation(struct ext4_inode *inode, u32int gen);
+
+/**@brief Get extra I-node size field.
+ * @param sb Superblock
+ * @param inode I-node
+ * @return extra I-node size
+ */
+u16int ext4_inode_get_extra_isize(struct ext4_sblock *sb,
+ struct ext4_inode *inode);
+
+/**@brief Set extra I-node size field.
+ * @param sb Superblock
+ * @param inode I-node
+ * @param size extra I-node size
+ */
+void ext4_inode_set_extra_isize(struct ext4_sblock *sb,
+ struct ext4_inode *inode,
+ u16int size);
+
+/**@brief Get address of block, where are extended attributes located.
+ * @param inode I-node
+ * @param sb Superblock
+ * @return Block address
+ */
+u64int ext4_inode_get_file_acl(struct ext4_inode *inode,
+ struct ext4_sblock *sb);
+
+/**@brief Set address of block, where are extended attributes located.
+ * @param inode I-node
+ * @param sb Superblock
+ * @param file_acl Block address
+ */
+void ext4_inode_set_file_acl(struct ext4_inode *inode, struct ext4_sblock *sb,
+ u64int acl);
+
+/**@brief Get block address of specified direct block.
+ * @param inode I-node to load block from
+ * @param idx Index of logical block
+ * @return Physical block address
+ */
+u32int ext4_inode_get_direct_block(struct ext4_inode *inode, u32int idx);
+
+/**@brief Set block address of specified direct block.
+ * @param inode I-node to set block address to
+ * @param idx Index of logical block
+ * @param fblock Physical block address
+ */
+void ext4_inode_set_direct_block(struct ext4_inode *inode, u32int idx,
+ u32int block);
+
+/**@brief Get block address of specified indirect block.
+ * @param inode I-node to get block address from
+ * @param idx Index of indirect block
+ * @return Physical block address
+ */
+u32int ext4_inode_get_indirect_block(struct ext4_inode *inode, u32int idx);
+
+/**@brief Set block address of specified indirect block.
+ * @param inode I-node to set block address to
+ * @param idx Index of indirect block
+ * @param fblock Physical block address
+ */
+void ext4_inode_set_indirect_block(struct ext4_inode *inode, u32int idx,
+ u32int block);
+
+/**@brief Get device number
+ * @param inode I-node to get device number from
+ * @return Device number
+ */
+u32int ext4_inode_get_dev(struct ext4_inode *inode);
+
+/**@brief Set device number
+ * @param inode I-node to set device number to
+ * @param dev Device number
+ */
+void ext4_inode_set_dev(struct ext4_inode *inode, u32int dev);
+
+/**@brief return the type of i-node
+ * @param sb Superblock
+ * @param inode I-node to return the type of
+ * @return Result of check operation
+ */
+u32int ext4_inode_type(struct ext4_sblock *sb, struct ext4_inode *inode);
+
+/**@brief Check if i-node has specified type.
+ * @param sb Superblock
+ * @param inode I-node to check type of
+ * @param type Type to check
+ * @return Result of check operation
+ */
+bool ext4_inode_is_type(struct ext4_sblock *sb, struct ext4_inode *inode,
+ u32int type);
+
+/**@brief Check if i-node has specified flag.
+ * @param inode I-node to check flags of
+ * @param flag Flag to check
+ * @return Result of check operation
+ */
+bool ext4_inode_has_flag(struct ext4_inode *inode, u32int f);
+
+/**@brief Remove specified flag from i-node.
+ * @param inode I-node to clear flag on
+ * @param clear_flag Flag to be cleared
+ */
+void ext4_inode_clear_flag(struct ext4_inode *inode, u32int f);
+
+/**@brief Set specified flag to i-node.
+ * @param inode I-node to set flag on
+ * @param set_flag Flag to be set
+ */
+void ext4_inode_set_flag(struct ext4_inode *inode, u32int f);
+
+/**@brief Get inode checksum(crc32)
+ * @param sb Superblock
+ * @param inode I-node to get checksum value from
+ */
+u32int
+ext4_inode_get_csum(struct ext4_sblock *sb, struct ext4_inode *inode);
+
+/**@brief Get inode checksum(crc32)
+ * @param sb Superblock
+ * @param inode I-node to get checksum value from
+ */
+void
+ext4_inode_set_csum(struct ext4_sblock *sb, struct ext4_inode *inode,
+ u32int checksum);
+
+/**@brief Check if i-node can be truncated.
+ * @param sb Superblock
+ * @param inode I-node to check
+ * @return Result of the check operation
+ */
+bool ext4_inode_can_truncate(struct ext4_sblock *sb, struct ext4_inode *inode);
+
+/**@brief Get extent header from the root of the extent tree.
+ * @param inode I-node to get extent header from
+ * @return Pointer to extent header of the root node
+ */
+struct ext4_extent_header *
+ext4_inode_get_extent_header(struct ext4_inode *inode);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_journal.h
@@ -1,0 +1,97 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "queue.h"
+#include "tree.h"
+
+struct jbd_fs {
+ struct ext4_blockdev *bdev;
+ struct ext4_inode_ref inode_ref;
+ struct jbd_sb sb;
+
+ bool dirty;
+};
+
+struct jbd_buf {
+ u32int jbd_lba;
+ struct ext4_block block;
+ struct jbd_trans *trans;
+ struct jbd_block_rec *block_rec;
+ TAILQ_ENTRY(jbd_buf) buf_node;
+ TAILQ_ENTRY(jbd_buf) dirty_buf_node;
+};
+
+struct jbd_revoke_rec {
+ ext4_fsblk_t lba;
+ RB_ENTRY(jbd_revoke_rec) revoke_node;
+};
+
+struct jbd_block_rec {
+ ext4_fsblk_t lba;
+ struct jbd_trans *trans;
+ RB_ENTRY(jbd_block_rec) block_rec_node;
+ LIST_ENTRY(jbd_block_rec) tbrec_node;
+ TAILQ_HEAD(jbd_buf_dirty, jbd_buf) dirty_buf_queue;
+};
+
+struct jbd_trans {
+ u32int trans_id;
+
+ u32int start_iblock;
+ int alloc_blocks;
+ int data_cnt;
+ u32int data_csum;
+ int written_cnt;
+ int error;
+
+ struct jbd_journal *journal;
+
+ TAILQ_HEAD(jbd_trans_buf, jbd_buf) buf_queue;
+ RB_HEAD(jbd_revoke_tree, jbd_revoke_rec) revoke_root;
+ LIST_HEAD(jbd_trans_block_rec, jbd_block_rec) tbrec_list;
+ TAILQ_ENTRY(jbd_trans) trans_node;
+};
+
+struct jbd_journal {
+ u32int first;
+ u32int start;
+ u32int last;
+ u32int trans_id;
+ u32int alloc_trans_id;
+
+ u32int block_size;
+
+ TAILQ_HEAD(jbd_cp_queue, jbd_trans) cp_queue;
+ RB_HEAD(jbd_block, jbd_block_rec) block_rec_root;
+
+ struct jbd_fs *jbd_fs;
+};
+
+int jbd_get_fs(struct ext4_fs *fs,
+ struct jbd_fs *jbd_fs);
+int jbd_put_fs(struct jbd_fs *jbd_fs);
+int jbd_inode_bmap(struct jbd_fs *jbd_fs,
+ ext4_lblk_t iblock,
+ ext4_fsblk_t *fblock);
+int jbd_recover(struct jbd_fs *jbd_fs);
+int jbd_journal_start(struct jbd_fs *jbd_fs,
+ struct jbd_journal *journal);
+int jbd_journal_stop(struct jbd_journal *journal);
+struct jbd_trans *
+jbd_journal_new_trans(struct jbd_journal *journal);
+int jbd_trans_set_block_dirty(struct jbd_trans *trans,
+ struct ext4_block *block);
+int jbd_trans_revoke_block(struct jbd_trans *trans,
+ ext4_fsblk_t lba);
+int jbd_trans_try_revoke_block(struct jbd_trans *trans,
+ ext4_fsblk_t lba);
+void jbd_journal_free_trans(struct jbd_journal *journal,
+ struct jbd_trans *trans,
+ bool abort);
+int jbd_journal_commit_trans(struct jbd_journal *journal,
+ struct jbd_trans *trans);
+void
+jbd_journal_purge_cp_trans(struct jbd_journal *journal,
+ bool flush,
+ bool once);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_mbr.h
@@ -1,0 +1,22 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_blockdev.h"
+
+/**@brief Master boot record block devices descriptor*/
+struct ext4_mbr_bdevs {
+ struct ext4_blockdev partitions[4];
+};
+
+int ext4_mbr_scan(struct ext4_blockdev *parent, struct ext4_mbr_bdevs *bdevs);
+
+/**@brief Master boot record partitions*/
+struct ext4_mbr_parts {
+
+ /**@brief Percentage division tab:
+ * - {50, 20, 10, 20}
+ * Sum of all 4 elements must be <= 100*/
+ u8int division[4];
+};
+
+int ext4_mbr_write(struct ext4_blockdev *parent, struct ext4_mbr_parts *parts, u32int disk_id);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_misc.h
@@ -1,0 +1,87 @@
+#pragma once
+
+#define EXT4_DIV_ROUND_UP(x, y) (((x) + (y) - 1)/(y))
+#define EXT4_ALIGN(x, y) ((y) * EXT4_DIV_ROUND_UP((x), (y)))
+
+/****************************Endian conversion*****************/
+
+static inline u64int reorder64(u64int n)
+{
+ return ((n & 0xff) << 56) |
+ ((n & 0xff00) << 40) |
+ ((n & 0xff0000) << 24) |
+ ((n & 0xff000000LL) << 8) |
+ ((n & 0xff00000000LL) >> 8) |
+ ((n & 0xff0000000000LL) >> 24) |
+ ((n & 0xff000000000000LL) >> 40) |
+ ((n & 0xff00000000000000LL) >> 56);
+}
+
+static inline u32int reorder32(u32int n)
+{
+ return ((n & 0xff) << 24) |
+ ((n & 0xff00) << 8) |
+ ((n & 0xff0000) >> 8) |
+ ((n & 0xff000000) >> 24);
+}
+
+static inline u16int reorder16(u16int n)
+{
+ return ((n & 0xff) << 8) |
+ ((n & 0xff00) >> 8);
+}
+
+#ifdef CONFIG_BIG_ENDIAN
+#define to_le64(_n) reorder64(_n)
+#define to_le32(_n) reorder32(_n)
+#define to_le16(_n) reorder16(_n)
+
+#define to_be64(_n) (_n)
+#define to_be32(_n) (_n)
+#define to_be16(_n) (_n)
+
+#else
+#define to_le64(_n) (_n)
+#define to_le32(_n) (_n)
+#define to_le16(_n) (_n)
+
+#define to_be64(_n) reorder64(_n)
+#define to_be32(_n) reorder32(_n)
+#define to_be16(_n) reorder16(_n)
+#endif
+
+/****************************Access macros to ext4 structures*****************/
+
+#define ext4_get32(s, f) to_le32((s)->f)
+#define ext4_get16(s, f) to_le16((s)->f)
+#define ext4_get8(s, f) (s)->f
+
+#define ext4_set32(s, f, v) \
+ do { \
+ (s)->f = to_le32(v); \
+ } while (0)
+#define ext4_set16(s, f, v) \
+ do { \
+ (s)->f = to_le16(v); \
+ } while (0)
+#define ext4_set8 \
+ (s, f, v) do { (s)->f = (v); } \
+ while (0)
+
+/****************************Access macros to jbd2 structures*****************/
+
+#define jbd_get32(s, f) to_be32((s)->f)
+#define jbd_get16(s, f) to_be16((s)->f)
+#define jbd_get8(s, f) (s)->f
+
+#define jbd_set32(s, f, v) \
+ do { \
+ (s)->f = to_be32(v); \
+ } while (0)
+#define jbd_set16(s, f, v) \
+ do { \
+ (s)->f = to_be16(v); \
+ } while (0)
+#define jbd_set8 \
+ (s, f, v) do { (s)->f = (v); } \
+ while (0)
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_mkfs.h
@@ -1,0 +1,49 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_blockdev.h"
+#include "ext4_fs.h"
+
+struct ext4_mkfs_info {
+ u64int len;
+ u32int block_size;
+ u32int blocks_per_group;
+ u32int inodes_per_group;
+ u32int inode_size;
+ u32int inodes;
+ u32int journal_blocks;
+ u32int feat_ro_compat;
+ u32int feat_compat;
+ u32int feat_incompat;
+ u32int bg_desc_reserve_blocks;
+ u16int dsc_size;
+ u8int uuid[UUID_SIZE];
+ bool journal;
+ char label[16];
+};
+
+struct fs_aux_info {
+ struct ext4_sblock *sb;
+ u8int *bg_desc_blk;
+ struct xattr_list_element *xattrs;
+ u32int first_data_block;
+ u64int len_blocks;
+ u32int inode_table_blocks;
+ u32int groups;
+ u32int bg_desc_blocks;
+ u32int default_i_flags;
+ u32int blocks_per_ind;
+ u32int blocks_per_dind;
+ u32int blocks_per_tind;
+};
+
+int create_fs_aux_info(struct fs_aux_info *aux_info, struct ext4_mkfs_info *info);
+void release_fs_aux_info(struct fs_aux_info *aux_info);
+
+int write_sblocks(struct ext4_blockdev *bd, struct fs_aux_info *aux_info, struct ext4_mkfs_info *info);
+
+int ext4_mkfs_read_info(struct ext4_blockdev *bd, struct ext4_mkfs_info *info);
+
+int ext4_mkfs(struct ext4_fs *fs, struct ext4_blockdev *bd,
+ struct ext4_mkfs_info *info, int fs_type);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_super.h
@@ -1,0 +1,185 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+#include "ext4_misc.h"
+
+/**@brief Blocks count get stored in superblock.
+ * @param s superblock descriptor
+ * @return count of blocks*/
+static inline u64int ext4_sb_get_blocks_cnt(struct ext4_sblock *s)
+{
+ return ((u64int)to_le32(s->blocks_count_hi) << 32) |
+ to_le32(s->blocks_count_lo);
+}
+
+/**@brief Blocks count set in superblock.
+ * @param s superblock descriptor
+ * @return count of blocks*/
+static inline void ext4_sb_set_blocks_cnt(struct ext4_sblock *s, u64int cnt)
+{
+ s->blocks_count_lo = to_le32((cnt << 32) >> 32);
+ s->blocks_count_hi = to_le32(cnt >> 32);
+}
+
+/**@brief Free blocks count get stored in superblock.
+ * @param s superblock descriptor
+ * @return free blocks*/
+static inline u64int ext4_sb_get_free_blocks_cnt(struct ext4_sblock *s)
+{
+ return ((u64int)to_le32(s->free_blocks_count_hi) << 32) |
+ to_le32(s->free_blocks_count_lo);
+}
+
+/**@brief Free blocks count set.
+ * @param s superblock descriptor
+ * @param cnt new value of free blocks*/
+static inline void ext4_sb_set_free_blocks_cnt(struct ext4_sblock *s,
+ u64int cnt)
+{
+ s->free_blocks_count_lo = to_le32((cnt << 32) >> 32);
+ s->free_blocks_count_hi = to_le32(cnt >> 32);
+}
+
+/**@brief Block size get from superblock.
+ * @param s superblock descriptor
+ * @return block size in bytes*/
+static inline u32int ext4_sb_get_block_size(struct ext4_sblock *s)
+{
+ return 1024 << to_le32(s->log_block_size);
+}
+
+/**@brief Block group descriptor size.
+ * @param s superblock descriptor
+ * @return block group descriptor size in bytes*/
+static inline u16int ext4_sb_get_desc_size(struct ext4_sblock *s)
+{
+ u16int size = to_le16(s->desc_size);
+
+ return size < EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE
+ ? EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE
+ : size;
+}
+
+/*************************Flags and features*********************************/
+
+/**@brief Support check of flag.
+ * @param s superblock descriptor
+ * @param v flag to check
+ * @return true if flag is supported*/
+static inline bool ext4_sb_check_flag(struct ext4_sblock *s, u32int v)
+{
+ return to_le32(s->flags) & v;
+}
+
+/**@brief Support check of feature compatible.
+ * @param s superblock descriptor
+ * @param v feature to check
+ * @return true if feature is supported*/
+static inline bool ext4_sb_feature_com(struct ext4_sblock *s, u32int v)
+{
+ return to_le32(s->features_compatible) & v;
+}
+
+/**@brief Support check of feature incompatible.
+ * @param s superblock descriptor
+ * @param v feature to check
+ * @return true if feature is supported*/
+static inline bool ext4_sb_feature_incom(struct ext4_sblock *s, u32int v)
+{
+ return to_le32(s->features_incompatible) & v;
+}
+
+/**@brief Support check of read only flag.
+ * @param s superblock descriptor
+ * @param v flag to check
+ * @return true if flag is supported*/
+static inline bool ext4_sb_feature_ro_com(struct ext4_sblock *s, u32int v)
+{
+ return to_le32(s->features_read_only) & v;
+}
+
+/**@brief Block group to flex group.
+ * @param s superblock descriptor
+ * @param block_group block group
+ * @return flex group id*/
+static inline u32int ext4_sb_bg_to_flex(struct ext4_sblock *s,
+ u32int block_group)
+{
+ return block_group >> to_le32(s->log_groups_per_flex);
+}
+
+/**@brief Flex block group size.
+ * @param s superblock descriptor
+ * @return flex bg size*/
+static inline u32int ext4_sb_flex_bg_size(struct ext4_sblock *s)
+{
+ return 1 << to_le32(s->log_groups_per_flex);
+}
+
+/**@brief Return first meta block group id.
+ * @param s superblock descriptor
+ * @return first meta_bg id */
+static inline u32int ext4_sb_first_meta_bg(struct ext4_sblock *s)
+{
+ return to_le32(s->first_meta_bg);
+}
+
+/**************************More complex functions****************************/
+
+/**@brief Returns a block group count.
+ * @param s superblock descriptor
+ * @return count of block groups*/
+u32int ext4_block_group_cnt(struct ext4_sblock *s);
+
+/**@brief Returns block count in block group
+ * (last block group may have less blocks)
+ * @param s superblock descriptor
+ * @param bgid block group id
+ * @return blocks count*/
+u32int ext4_blocks_in_group_cnt(struct ext4_sblock *s, u32int bgid);
+
+/**@brief Returns inodes count in block group
+ * (last block group may have less inodes)
+ * @param s superblock descriptor
+ * @param bgid block group id
+ * @return inodes count*/
+u32int ext4_inodes_in_group_cnt(struct ext4_sblock *s, u32int bgid);
+
+/***************************Read/write/check superblock**********************/
+
+/**@brief Superblock write.
+ * @param bdev block device descriptor.
+ * @param s superblock descriptor
+ * @return Standard error code */
+int ext4_sb_write(struct ext4_blockdev *bdev, struct ext4_sblock *s);
+
+/**@brief Superblock read.
+ * @param bdev block device descriptor.
+ * @param s superblock descriptor
+ * @return Standard error code */
+int ext4_sb_read(struct ext4_blockdev *bdev, struct ext4_sblock *s);
+
+/**@brief Superblock simple validation.
+ * @param s superblock descriptor
+ * @return true if OK*/
+bool ext4_sb_check(struct ext4_sblock *s);
+
+/**@brief Superblock presence in block group.
+ * @param s superblock descriptor
+ * @param block_group block group id
+ * @return true if block group has superblock*/
+bool ext4_sb_is_super_in_bg(struct ext4_sblock *s, u32int block_group);
+
+/**@brief TODO:*/
+bool ext4_sb_sparse(u32int group);
+
+/**@brief TODO:*/
+u32int ext4_bg_num_gdb(struct ext4_sblock *s, u32int group);
+
+/**@brief TODO:*/
+u32int ext4_num_base_meta_clusters(struct ext4_sblock *s,
+ u32int block_group);
+
+/**@brief TODO:*/
+void ext4_sb_set_csum(struct ext4_sblock *s);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_trans.h
@@ -1,0 +1,38 @@
+#pragma once
+
+#include "ext4_config.h"
+#include "ext4_types.h"
+
+/**@brief Mark a buffer dirty and add it to the current transaction.
+ * @param buf buffer
+ * @return standard error code*/
+int ext4_trans_set_block_dirty(struct ext4_buf *buf);
+
+/**@brief Block get function (through cache, don't read).
+ * jbd_trans_get_access would be called in order to
+ * get write access to the buffer.
+ * @param bdev block device descriptor
+ * @param b block descriptor
+ * @param lba logical block address
+ * @return standard error code*/
+int ext4_trans_block_get_noread(struct ext4_blockdev *bdev,
+ struct ext4_block *b,
+ u64int lba);
+
+/**@brief Block get function (through cache).
+ * jbd_trans_get_access would be called in order to
+ * get write access to the buffer.
+ * @param bdev block device descriptor
+ * @param b block descriptor
+ * @param lba logical block address
+ * @return standard error code*/
+int ext4_trans_block_get(struct ext4_blockdev *bdev,
+ struct ext4_block *b,
+ u64int lba);
+
+/**@brief Try to add block to be revoked to the current transaction.
+ * @param bdev block device descriptor
+ * @param lba logical block address
+ * @return standard error code*/
+int ext4_trans_try_revoke_block(struct ext4_blockdev *bdev,
+ u64int lba);
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/ext4_types.h
@@ -1,0 +1,833 @@
+#pragma once
+
+#include "ext4_blockdev.h"
+#include "tree.h"
+
+/*
+ * Types of blocks.
+ */
+typedef u32int ext4_lblk_t;
+typedef u64int ext4_fsblk_t;
+
+#define EXT4_CHECKSUM_CRC32C 1
+
+#define UUID_SIZE 16
+
+#pragma pack on
+
+/*
+ * Structure of the super block
+ */
+struct ext4_sblock {
+ u32int inodes_count; /* I-nodes count */
+ u32int blocks_count_lo; /* Blocks count */
+ u32int reserved_blocks_count_lo; /* Reserved blocks count */
+ u32int free_blocks_count_lo; /* Free blocks count */
+ u32int free_inodes_count; /* Free inodes count */
+ u32int first_data_block; /* First Data Block */
+ u32int log_block_size; /* Block size */
+ u32int log_cluster_size; /* Obsoleted fragment size */
+ u32int blocks_per_group; /* Number of blocks per group */
+ u32int frags_per_group; /* Obsoleted fragments per group */
+ u32int inodes_per_group; /* Number of inodes per group */
+ u32int mount_time; /* Mount time */
+ u32int write_time; /* Write time */
+ u16int mount_count; /* Mount count */
+ u16int max_mount_count; /* Maximal mount count */
+ u16int magic; /* Magic signature */
+ u16int state; /* File system state */
+ u16int errors; /* Behavior when detecting errors */
+ u16int minor_rev_level; /* Minor revision level */
+ u32int last_check_time; /* Time of last check */
+ u32int check_interval; /* Maximum time between checks */
+ u32int creator_os; /* Creator OS */
+ u32int rev_level; /* Revision level */
+ u16int def_resuid; /* Default uid for reserved blocks */
+ u16int def_resgid; /* Default gid for reserved blocks */
+
+ /* Fields for EXT4_DYNAMIC_REV superblocks only. */
+ u32int first_inode; /* First non-reserved inode */
+ u16int inode_size; /* Size of inode structure */
+ u16int block_group_index; /* Block group index of this superblock */
+ u32int features_compatible; /* Compatible feature set */
+ u32int features_incompatible; /* Incompatible feature set */
+ u32int features_read_only; /* Readonly-compatible feature set */
+ u8int uuid[UUID_SIZE]; /* 128-bit uuid for volume */
+ char volume_name[16]; /* Volume name */
+ char last_mounted[64]; /* Directory where last mounted */
+ u32int algorithm_usage_bitmap; /* For compression */
+
+ /*
+ * Performance hints. Directory preallocation should only
+ * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
+ */
+ u8int s_prealloc_blocks; /* Number of blocks to try to preallocate */
+ u8int s_prealloc_dir_blocks; /* Number to preallocate for dirs */
+ u16int s_reserved_gdt_blocks; /* Per group desc for online growth */
+
+ /*
+ * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
+ */
+ u8int journal_uuid[UUID_SIZE]; /* UUID of journal superblock */
+ u32int journal_inode_number; /* Inode number of journal file */
+ u32int journal_dev; /* Device number of journal file */
+ u32int last_orphan; /* Head of list of inodes to delete */
+ u32int hash_seed[4]; /* HTREE hash seed */
+ u8int default_hash_version; /* Default hash version to use */
+ u8int journal_backup_type;
+ u16int desc_size; /* Size of group descriptor */
+ u32int default_mount_opts; /* Default mount options */
+ u32int first_meta_bg; /* First metablock block group */
+ u32int mkfs_time; /* When the filesystem was created */
+ u32int journal_blocks[17]; /* Backup of the journal inode */
+
+ /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
+ u32int blocks_count_hi; /* Blocks count */
+ u32int reserved_blocks_count_hi; /* Reserved blocks count */
+ u32int free_blocks_count_hi; /* Free blocks count */
+ u16int min_extra_isize; /* All inodes have at least # bytes */
+ u16int want_extra_isize; /* New inodes should reserve # bytes */
+ u32int flags; /* Miscellaneous flags */
+ u16int raid_stride; /* RAID stride */
+ u16int mmp_interval; /* # seconds to wait in MMP checking */
+ u64int mmp_block; /* Block for multi-mount protection */
+ u32int raid_stripe_width; /* Blocks on all data disks (N * stride) */
+ u8int log_groups_per_flex; /* FLEX_BG group size */
+ u8int checksum_type;
+ u16int reserved_pad;
+ u64int kbytes_written; /* Number of lifetime kilobytes written */
+ u32int snapshot_inum; /* I-node number of active snapshot */
+ u32int snapshot_id; /* Sequential ID of active snapshot */
+ u64int
+ snapshot_r_blocks_count; /* Reserved blocks for active snapshot's
+ future use */
+ u32int
+ snapshot_list; /* I-node number of the head of the on-disk snapshot
+ list */
+ u32int error_count; /* Number of file system errors */
+ u32int first_error_time; /* First time an error happened */
+ u32int first_error_ino; /* I-node involved in first error */
+ u64int first_error_block; /* Block involved of first error */
+ u8int first_error_func[32]; /* Function where the error happened */
+ u32int first_error_line; /* Line number where error happened */
+ u32int last_error_time; /* Most recent time of an error */
+ u32int last_error_ino; /* I-node involved in last error */
+ u32int last_error_line; /* Line number where error happened */
+ u64int last_error_block; /* Block involved of last error */
+ u8int last_error_func[32]; /* Function where the error happened */
+ u8int mount_opts[64];
+ u32int usr_quota_inum; /* inode for tracking user quota */
+ u32int grp_quota_inum; /* inode for tracking group quota */
+ u32int overhead_clusters; /* overhead blocks/clusters in fs */
+ u32int backup_bgs[2]; /* groups with sparse_super2 SBs */
+ u8int encrypt_algos[4]; /* Encryption algorithms in use */
+ u8int encrypt_pw_salt[16]; /* Salt used for string2key algorithm */
+ u32int lpf_ino; /* Location of the lost+found inode */
+ u32int padding[100]; /* Padding to the end of the block */
+ u32int checksum; /* crc32c(superblock) */
+};
+
+#pragma pack off
+
+#define EXT4_SUPERBLOCK_MAGIC 0xEF53
+#define EXT4_SUPERBLOCK_SIZE 1024
+#define EXT4_SUPERBLOCK_OFFSET 1024
+
+#define EXT4_SUPERBLOCK_OS_LINUX 0
+#define EXT4_SUPERBLOCK_OS_HURD 1
+
+/*
+ * Misc. filesystem flags
+ */
+#define EXT4_SUPERBLOCK_FLAGS_SIGNED_HASH 0x0001
+#define EXT4_SUPERBLOCK_FLAGS_UNSIGNED_HASH 0x0002
+#define EXT4_SUPERBLOCK_FLAGS_TEST_FILESYS 0x0004
+/*
+ * Filesystem states
+ */
+#define EXT4_SUPERBLOCK_STATE_VALID_FS 0x0001 /* Unmounted cleanly */
+#define EXT4_SUPERBLOCK_STATE_ERROR_FS 0x0002 /* Errors detected */
+#define EXT4_SUPERBLOCK_STATE_ORPHAN_FS 0x0004 /* Orphans being recovered */
+
+/*
+ * Behaviour when errors detected
+ */
+#define EXT4_SUPERBLOCK_ERRORS_CONTINUE 1 /* Continue execution */
+#define EXT4_SUPERBLOCK_ERRORS_RO 2 /* Remount fs read-only */
+#define EXT4_SUPERBLOCK_ERRORS_PANIC 3 /* Panic */
+#define EXT4_SUPERBLOCK_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE
+
+/*
+ * Compatible features
+ */
+#define EXT4_FCOM_DIR_PREALLOC 0x0001
+#define EXT4_FCOM_IMAGIC_INODES 0x0002
+#define EXT4_FCOM_HAS_JOURNAL 0x0004
+#define EXT4_FCOM_EXT_ATTR 0x0008
+#define EXT4_FCOM_RESIZE_INODE 0x0010
+#define EXT4_FCOM_DIR_INDEX 0x0020
+
+/*
+ * Read-only compatible features
+ */
+#define EXT4_FRO_COM_SPARSE_SUPER 0x0001
+#define EXT4_FRO_COM_LARGE_FILE 0x0002
+#define EXT4_FRO_COM_BTREE_DIR 0x0004
+#define EXT4_FRO_COM_HUGE_FILE 0x0008
+#define EXT4_FRO_COM_GDT_CSUM 0x0010
+#define EXT4_FRO_COM_DIR_NLINK 0x0020
+#define EXT4_FRO_COM_EXTRA_ISIZE 0x0040
+#define EXT4_FRO_COM_QUOTA 0x0100
+#define EXT4_FRO_COM_BIGALLOC 0x0200
+#define EXT4_FRO_COM_METADATA_CSUM 0x0400
+
+/*
+ * Incompatible features
+ */
+#define EXT4_FINCOM_COMPRESSION 0x0001
+#define EXT4_FINCOM_FILETYPE 0x0002
+#define EXT4_FINCOM_RECOVER 0x0004 /* Needs recovery */
+#define EXT4_FINCOM_JOURNAL_DEV 0x0008 /* Journal device */
+#define EXT4_FINCOM_META_BG 0x0010
+#define EXT4_FINCOM_EXTENTS 0x0040 /* extents support */
+#define EXT4_FINCOM_64BIT 0x0080
+#define EXT4_FINCOM_MMP 0x0100
+#define EXT4_FINCOM_FLEX_BG 0x0200
+#define EXT4_FINCOM_EA_INODE 0x0400 /* EA in inode */
+#define EXT4_FINCOM_DIRDATA 0x1000 /* data in dirent */
+#define EXT4_FINCOM_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */
+#define EXT4_FINCOM_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
+#define EXT4_FINCOM_INLINE_DATA 0x8000 /* data in inode */
+
+/*
+ * EXT2 supported feature set
+ */
+#define EXT2_SUPPORTED_FCOM 0x0000
+
+#define EXT2_SUPPORTED_FINCOM \
+ (EXT4_FINCOM_FILETYPE | EXT4_FINCOM_META_BG)
+
+#define EXT2_SUPPORTED_FRO_COM \
+ (EXT4_FRO_COM_SPARSE_SUPER | \
+ EXT4_FRO_COM_LARGE_FILE)
+
+/*
+ * EXT3 supported feature set
+ */
+#define EXT3_SUPPORTED_FCOM (EXT4_FCOM_DIR_INDEX)
+
+#define EXT3_SUPPORTED_FINCOM \
+ (EXT4_FINCOM_FILETYPE | EXT4_FINCOM_META_BG)
+
+#define EXT3_SUPPORTED_FRO_COM \
+ (EXT4_FRO_COM_SPARSE_SUPER | EXT4_FRO_COM_LARGE_FILE)
+
+/*
+ * EXT4 supported feature set
+ */
+#define EXT4_SUPPORTED_FCOM (EXT4_FCOM_DIR_INDEX)
+
+#define EXT4_SUPPORTED_FINCOM ( \
+ EXT4_FINCOM_FILETYPE | EXT4_FINCOM_META_BG | \
+ EXT4_FINCOM_EXTENTS | EXT4_FINCOM_FLEX_BG | \
+ EXT4_FINCOM_64BIT \
+)
+
+#define EXT4_SUPPORTED_FRO_COM ( \
+ EXT4_FRO_COM_SPARSE_SUPER | \
+ EXT4_FRO_COM_METADATA_CSUM | \
+ EXT4_FRO_COM_LARGE_FILE | EXT4_FRO_COM_GDT_CSUM | \
+ EXT4_FRO_COM_DIR_NLINK | \
+ EXT4_FRO_COM_EXTRA_ISIZE | EXT4_FRO_COM_HUGE_FILE \
+)
+
+/*Ignored features:
+ * RECOVER - journaling in lwext4 is not supported
+ * (probably won't be ever...)
+ * MMP - multi-mout protection (impossible scenario)
+ * */
+#define EXT_FINCOM_IGNORED ( \
+ EXT4_FINCOM_RECOVER | \
+ EXT4_FINCOM_MMP | \
+ EXT4_FINCOM_BG_USE_META_CSUM \
+)
+
+/*
+// TODO: Features incompatible to implement
+#define EXT4_SUPPORTED_FINCOM
+ (EXT4_FINCOM_INLINE_DATA)
+
+// TODO: Features read only to implement
+#define EXT4_SUPPORTED_FRO_COM
+ EXT4_FRO_COM_BIGALLOC |\
+ EXT4_FRO_COM_QUOTA)
+*/
+
+
+/* Inode table/bitmap not in use */
+#define EXT4_BLOCK_GROUP_INODE_UNINIT 0x0001
+/* Block bitmap not in use */
+#define EXT4_BLOCK_GROUP_BLOCK_UNINIT 0x0002
+/* On-disk itable initialized to zero */
+#define EXT4_BLOCK_GROUP_ITABLE_ZEROED 0x0004
+
+#pragma pack on
+
+/*
+ * Structure of a blocks group descriptor
+ */
+struct ext4_bgroup {
+ u32int block_bitmap_lo; /* Blocks bitmap block */
+ u32int inode_bitmap_lo; /* Inodes bitmap block */
+ u32int inode_table_first_block_lo; /* Inodes table block */
+ u16int free_blocks_count_lo; /* Free blocks count */
+ u16int free_inodes_count_lo; /* Free inodes count */
+ u16int used_dirs_count_lo; /* Directories count */
+ u16int flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
+ u32int exclude_bitmap_lo; /* Exclude bitmap for snapshots */
+ u16int block_bitmap_csum_lo; /* crc32c(s_uuid+grp_num+bbitmap) LE */
+ u16int inode_bitmap_csum_lo; /* crc32c(s_uuid+grp_num+ibitmap) LE */
+ u16int itable_unused_lo; /* Unused inodes count */
+ u16int checksum; /* crc16(sb_uuid+group+desc) */
+
+ u32int block_bitmap_hi; /* Blocks bitmap block MSB */
+ u32int inode_bitmap_hi; /* I-nodes bitmap block MSB */
+ u32int inode_table_first_block_hi; /* I-nodes table block MSB */
+ u16int free_blocks_count_hi; /* Free blocks count MSB */
+ u16int free_inodes_count_hi; /* Free i-nodes count MSB */
+ u16int used_dirs_count_hi; /* Directories count MSB */
+ u16int itable_unused_hi; /* Unused inodes count MSB */
+ u32int exclude_bitmap_hi; /* Exclude bitmap block MSB */
+ u16int block_bitmap_csum_hi; /* crc32c(s_uuid+grp_num+bbitmap) BE */
+ u16int inode_bitmap_csum_hi; /* crc32c(s_uuid+grp_num+ibitmap) BE */
+ u32int reserved; /* Padding */
+};
+
+#pragma pack off
+
+#define EXT4_MIN_BLOCK_GROUP_DESCRIPTOR_SIZE 32
+#define EXT4_MAX_BLOCK_GROUP_DESCRIPTOR_SIZE 64
+
+#define EXT4_MIN_BLOCK_SIZE 1024 /* 1 KiB */
+#define EXT4_MAX_BLOCK_SIZE 65536 /* 64 KiB */
+#define EXT4_REV0_INODE_SIZE 128
+
+#define EXT4_INODE_BLOCK_SIZE 512
+
+#define EXT4_INODE_DIRECT_BLOCK_COUNT 12
+#define EXT4_INODE_INDIRECT_BLOCK EXT4_INODE_DIRECT_BLOCK_COUNT
+#define EXT4_INODE_DOUBLE_INDIRECT_BLOCK (EXT4_INODE_INDIRECT_BLOCK + 1)
+#define EXT4_INODE_TRIPPLE_INDIRECT_BLOCK (EXT4_INODE_DOUBLE_INDIRECT_BLOCK + 1)
+#define EXT4_INODE_BLOCKS (EXT4_INODE_TRIPPLE_INDIRECT_BLOCK + 1)
+#define EXT4_INODE_INDIRECT_BLOCK_COUNT \
+ (EXT4_INODE_BLOCKS - EXT4_INODE_DIRECT_BLOCK_COUNT)
+
+#pragma pack on
+
+/*
+ * Structure of an inode on the disk
+ */
+struct ext4_inode {
+ u16int mode; /* File mode */
+ u16int uid; /* Low 16 bits of owner uid */
+ u32int size_lo; /* Size in bytes */
+ u32int access_time; /* Access time */
+ u32int change_inode_time; /* I-node change time */
+ u32int modification_time; /* Modification time */
+ u32int deletion_time; /* Deletion time */
+ u16int gid; /* Low 16 bits of group id */
+ u16int links_count; /* Links count */
+ u32int blocks_count_lo; /* Blocks count */
+ u32int flags; /* File flags */
+ u32int unused_osd1; /* OS dependent - not used in HelenOS */
+ u32int blocks[EXT4_INODE_BLOCKS]; /* Pointers to blocks */
+ u32int generation; /* File version (for NFS) */
+ u32int file_acl_lo; /* File ACL */
+ u32int size_hi;
+ u32int obso_faddr; /* Obsoleted fragment address */
+
+ union {
+ struct {
+ u16int blocks_high;
+ u16int file_acl_high;
+ u16int uid_high;
+ u16int gid_high;
+ u16int checksum_lo; /* crc32c(uuid+inum+inode) LE */
+ u16int reserved2;
+ } linux2;
+ struct {
+ u16int reserved1;
+ u16int mode_high;
+ u16int uid_high;
+ u16int gid_high;
+ u32int author;
+ } hurd2;
+ } osd2;
+
+ u16int extra_isize;
+ u16int checksum_hi; /* crc32c(uuid+inum+inode) BE */
+ u32int ctime_extra; /* Extra change time (nsec << 2 | epoch) */
+ u32int mtime_extra; /* Extra Modification time (nsec << 2 | epoch) */
+ u32int atime_extra; /* Extra Access time (nsec << 2 | epoch) */
+ u32int crtime; /* File creation time */
+ u32int
+ crtime_extra; /* Extra file creation time (nsec << 2 | epoch) */
+ u32int version_hi; /* High 32 bits for 64-bit version */
+};
+
+#pragma pack off
+
+#define EXT4_INODE_MODE_FIFO 0x1000
+#define EXT4_INODE_MODE_CHARDEV 0x2000
+#define EXT4_INODE_MODE_DIRECTORY 0x4000
+#define EXT4_INODE_MODE_BLOCKDEV 0x6000
+#define EXT4_INODE_MODE_FILE 0x8000
+#define EXT4_INODE_MODE_SOFTLINK 0xA000
+#define EXT4_INODE_MODE_SOCKET 0xC000
+#define EXT4_INODE_MODE_TYPE_MASK 0xF000
+
+/*
+ * Inode flags
+ */
+#define EXT4_INODE_FLAG_SECRM 0x00000001 /* Secure deletion */
+#define EXT4_INODE_FLAG_UNRM 0x00000002 /* Undelete */
+#define EXT4_INODE_FLAG_COMPR 0x00000004 /* Compress file */
+#define EXT4_INODE_FLAG_SYNC 0x00000008 /* Synchronous updates */
+#define EXT4_INODE_FLAG_IMMUTABLE 0x00000010 /* Immutable file */
+#define EXT4_INODE_FLAG_APPEND 0x00000020 /* writes to file may only append */
+#define EXT4_INODE_FLAG_NODUMP 0x00000040 /* do not dump file */
+#define EXT4_INODE_FLAG_NOATIME 0x00000080 /* do not update atime */
+
+/* Compression flags */
+#define EXT4_INODE_FLAG_DIRTY 0x00000100
+#define EXT4_INODE_FLAG_COMPRBLK \
+ 0x00000200 /* One or more compressed clusters */
+#define EXT4_INODE_FLAG_NOCOMPR 0x00000400 /* Don't compress */
+#define EXT4_INODE_FLAG_ECOMPR 0x00000800 /* Compression error */
+
+#define EXT4_INODE_FLAG_INDEX 0x00001000 /* hash-indexed directory */
+#define EXT4_INODE_FLAG_IMAGIC 0x00002000 /* AFS directory */
+#define EXT4_INODE_FLAG_JOURNAL_DATA \
+ 0x00004000 /* File data should be journaled */
+#define EXT4_INODE_FLAG_NOTAIL 0x00008000 /* File tail should not be merged */
+#define EXT4_INODE_FLAG_DIRSYNC \
+ 0x00010000 /* Dirsync behaviour (directories only) */
+#define EXT4_INODE_FLAG_TOPDIR 0x00020000 /* Top of directory hierarchies */
+#define EXT4_INODE_FLAG_HUGE_FILE 0x00040000 /* Set to each huge file */
+#define EXT4_INODE_FLAG_EXTENTS 0x00080000 /* Inode uses extents */
+#define EXT4_INODE_FLAG_EA_INODE 0x00200000 /* Inode used for large EA */
+#define EXT4_INODE_FLAG_EOFBLOCKS 0x00400000 /* Blocks allocated beyond EOF */
+#define EXT4_INODE_FLAG_RESERVED 0x80000000 /* reserved for ext4 lib */
+
+#define EXT4_INODE_ROOT_INDEX 2
+
+
+#define EXT4_DIRECTORY_FILENAME_LEN 255
+
+/**@brief Directory entry types. */
+enum { EXT4_DE_UNKNOWN = 0,
+ EXT4_DE_REG_FILE,
+ EXT4_DE_DIR,
+ EXT4_DE_CHRDEV,
+ EXT4_DE_BLKDEV,
+ EXT4_DE_FIFO,
+ EXT4_DE_SOCK,
+ EXT4_DE_SYMLINK };
+
+#define EXT4_DIRENTRY_DIR_CSUM 0xDE
+
+#pragma pack on
+
+union ext4_dir_en_internal {
+ u8int name_length_high; /* Higher 8 bits of name length */
+ u8int inode_type; /* Type of referenced inode (in rev >= 0.5) */
+};
+
+/**
+ * Linked list directory entry structure
+ */
+struct ext4_dir_en {
+ u32int inode; /* I-node for the entry */
+ u16int entry_len; /* Distance to the next directory entry */
+ u8int name_len; /* Lower 8 bits of name length */
+
+ union ext4_dir_en_internal in;
+ u8int name[]; /* Entry name */
+};
+
+/* Structures for indexed directory */
+
+struct ext4_dir_idx_climit {
+ u16int limit;
+ u16int count;
+};
+
+struct ext4_dir_idx_dot_en {
+ u32int inode;
+ u16int entry_length;
+ u8int name_length;
+ u8int inode_type;
+ u8int name[4];
+};
+
+struct ext4_dir_idx_rinfo {
+ u32int reserved_zero;
+ u8int hash_version;
+ u8int info_length;
+ u8int indirect_levels;
+ u8int unused_flags;
+};
+
+struct ext4_dir_idx_entry {
+ u32int hash;
+ u32int block;
+};
+
+struct ext4_dir_idx_root {
+ struct ext4_dir_idx_dot_en dots[2];
+ struct ext4_dir_idx_rinfo info;
+ struct ext4_dir_idx_entry en[];
+};
+
+struct ext4_fake_dir_entry {
+ u32int inode;
+ u16int entry_length;
+ u8int name_length;
+ u8int inode_type;
+};
+
+struct ext4_dir_idx_node {
+ struct ext4_fake_dir_entry fake;
+ struct ext4_dir_idx_entry entries[];
+};
+
+/*
+ * This goes at the end of each htree block.
+ */
+struct ext4_dir_idx_tail {
+ u32int reserved;
+ u32int checksum; /* crc32c(uuid+inum+dirblock) */
+};
+
+/*
+ * This is a bogus directory entry at the end of each leaf block that
+ * records checksums.
+ */
+struct ext4_dir_entry_tail {
+ u32int reserved_zero1; /* Pretend to be unused */
+ u16int rec_len; /* 12 */
+ u8int reserved_zero2; /* Zero name length */
+ u8int reserved_ft; /* 0xDE, fake file type */
+ u32int checksum; /* crc32c(uuid+inum+dirblock) */
+};
+
+#pragma pack off
+
+#define EXT4_DIRENT_TAIL(block, blocksize) \
+ ((struct ext4_dir_entry_tail *)(((char *)(block)) + ((blocksize) - \
+ sizeof(struct ext4_dir_entry_tail))))
+
+#define EXT4_ERR_BAD_DX_DIR (-25000)
+#define EXT4_ERR_NOT_FOUND (-25001)
+
+#define EXT4_LINK_MAX 65000
+
+#define EXT4_BAD_INO 1
+#define EXT4_ROOT_INO 2
+#define EXT4_BOOT_LOADER_INO 5
+#define EXT4_UNDEL_DIR_INO 6
+#define EXT4_RESIZE_INO 7
+#define EXT4_JOURNAL_INO 8
+
+#define EXT4_GOOD_OLD_FIRST_INO 11
+
+#pragma pack on
+
+/*
+ * This is the extent tail on-disk structure.
+ * All other extent structures are 12 bytes long. It turns out that
+ * block size % 12 >= 4 for at least all powers of 2 greater than 512, which
+ * covers all valid ext4 block sizes. Therefore, this tail structure can be
+ * crammed into the end of the block without having to rebalance the tree.
+ */
+struct ext4_extent_tail
+{
+ u32int checksum; /* crc32c(uuid+inum+extent_block) */
+};
+
+/*
+ * This is the extent on-disk structure.
+ * It's used at the bottom of the tree.
+ */
+struct ext4_extent {
+ u32int iblock; /* First logical block extent covers */
+ u16int nblocks; /* Number of blocks covered by extent */
+ u16int fblock_hi; /* High 16 bits of physical block */
+ u32int fblock_lo; /* Low 32 bits of physical block */
+};
+
+/*
+ * This is index on-disk structure.
+ * It's used at all the levels except the bottom.
+ */
+struct ext4_extent_index {
+ u32int iblock; /* Index covers logical blocks from 'block' */
+
+ /**
+ * Pointer to the physical block of the next
+ * level. leaf or next index could be there
+ * high 16 bits of physical block
+ */
+ u32int fblock_lo;
+ u16int fblock_hi;
+ u16int padding;
+};
+
+/*
+ * Each block (leaves and indexes), even inode-stored has header.
+ */
+struct ext4_extent_header {
+ u16int magic;
+ u16int nentries; /* Number of valid entries */
+ u16int max_nentries; /* Capacity of store in entries */
+ u16int depth; /* Has tree real underlying blocks? */
+ u32int generation; /* generation of the tree */
+};
+
+#pragma pack off
+
+#define EXT4_EXTENT_MAGIC 0xF30A
+
+/******************************************************************************/
+
+/* EXT3 HTree directory indexing */
+#define EXT2_HTREE_LEGACY 0
+#define EXT2_HTREE_HALF_MD4 1
+#define EXT2_HTREE_TEA 2
+#define EXT2_HTREE_LEGACY_UNSIGNED 3
+#define EXT2_HTREE_HALF_MD4_UNSIGNED 4
+#define EXT2_HTREE_TEA_UNSIGNED 5
+
+#define EXT2_HTREE_EOF 0x7FFFFFFFUL
+
+#define EXT4_GOOD_OLD_INODE_SIZE 128
+
+/*****************************************************************************/
+
+/*
+ * JBD stores integers in big endian.
+ */
+
+#define JBD_MAGIC_NUMBER 0xc03b3998U /* The first 4 bytes of /dev/random! */
+
+/*
+ * Descriptor block types:
+ */
+
+#define JBD_DESCRIPTOR_BLOCK 1
+#define JBD_COMMIT_BLOCK 2
+#define JBD_SUPERBLOCK 3
+#define JBD_SUPERBLOCK_V2 4
+#define JBD_REVOKE_BLOCK 5
+
+#pragma pack on
+
+/*
+ * Standard header for all descriptor blocks:
+ */
+struct jbd_bhdr {
+ u32int magic;
+ u32int blocktype;
+ u32int sequence;
+};
+
+#pragma pack off
+
+/*
+ * Checksum types.
+ */
+#define JBD_CRC32_CHKSUM 1
+#define JBD_MD5_CHKSUM 2
+#define JBD_SHA1_CHKSUM 3
+#define JBD_CRC32C_CHKSUM 4
+
+#define JBD_CRC32_CHKSUM_SIZE 4
+
+#define JBD_CHECKSUM_BYTES (32 / sizeof(u32int))
+
+#pragma pack on
+
+/*
+ * Commit block header for storing transactional checksums:
+ *
+ * NOTE: If FEATURE_COMPAT_CHECKSUM (checksum v1) is set, the h_chksum*
+ * fields are used to store a checksum of the descriptor and data blocks.
+ *
+ * If FEATURE_INCOMPAT_CSUM_V2 (checksum v2) is set, then the h_chksum
+ * field is used to store crc32c(uuid+commit_block). Each journal metadata
+ * block gets its own checksum, and data block checksums are stored in
+ * journal_block_tag (in the descriptor). The other h_chksum* fields are
+ * not used.
+ *
+ * If FEATURE_INCOMPAT_CSUM_V3 is set, the descriptor block uses
+ * journal_block_tag3_t to store a full 32-bit checksum. Everything else
+ * is the same as v2.
+ *
+ * Checksum v1, v2, and v3 are mutually exclusive features.
+ */
+
+struct jbd_commit_header {
+ struct jbd_bhdr header;
+ u8int chksum_type;
+ u8int chksum_size;
+ u8int padding[2];
+ u32int chksum[JBD_CHECKSUM_BYTES];
+ u64int commit_sec;
+ u32int commit_nsec;
+};
+
+/*
+ * The block tag: used to describe a single buffer in the journal
+ */
+struct jbd_block_tag3 {
+ u32int blocknr; /* The on-disk block number */
+ u32int flags; /* See below */
+ u32int blocknr_high; /* most-significant high 32bits. */
+ u32int checksum; /* crc32c(uuid+seq+block) */
+};
+
+struct jbd_block_tag {
+ u32int blocknr; /* The on-disk block number */
+ u16int checksum; /* truncated crc32c(uuid+seq+block) */
+ u16int flags; /* See below */
+ u32int blocknr_high; /* most-significant high 32bits. */
+};
+
+#pragma pack off
+
+/* Definitions for the journal tag flags word: */
+#define JBD_FLAG_ESCAPE 1 /* on-disk block is escaped */
+#define JBD_FLAG_SAME_UUID 2 /* block has same uuid as previous */
+#define JBD_FLAG_DELETED 4 /* block deleted by this transaction */
+#define JBD_FLAG_LAST_TAG 8 /* last tag in this descriptor block */
+
+#pragma pack on
+
+/* Tail of descriptor block, for checksumming */
+struct jbd_block_tail {
+ u32int checksum;
+};
+
+/*
+ * The revoke descriptor: used on disk to describe a series of blocks to
+ * be revoked from the log
+ */
+struct jbd_revoke_header {
+ struct jbd_bhdr header;
+ u32int count; /* Count of bytes used in the block */
+};
+
+/* Tail of revoke block, for checksumming */
+struct jbd_revoke_tail {
+ u32int checksum;
+};
+
+#pragma pack off
+
+#define JBD_USERS_MAX 48
+#define JBD_USERS_SIZE (UUID_SIZE * JBD_USERS_MAX)
+
+#pragma pack on
+
+/*
+ * The journal superblock. All fields are in big-endian byte order.
+ */
+struct jbd_sb {
+/* 0x0000 */
+ struct jbd_bhdr header;
+
+/* 0x000C */
+ /* Static information describing the journal */
+ u32int blocksize; /* journal device blocksize */
+ u32int maxlen; /* total blocks in journal file */
+ u32int first; /* first block of log information */
+
+/* 0x0018 */
+ /* Dynamic information describing the current state of the log */
+ u32int sequence; /* first commit ID expected in log */
+ u32int start; /* blocknr of start of log */
+
+/* 0x0020 */
+ /* Error value, as set by journal_abort(). */
+ s32int error_val;
+
+/* 0x0024 */
+ /* Remaining fields are only valid in a version-2 superblock */
+ u32int feature_compat; /* compatible feature set */
+ u32int feature_incompat; /* incompatible feature set */
+ u32int feature_ro_compat; /* readonly-compatible feature set */
+/* 0x0030 */
+ u8int uuid[UUID_SIZE]; /* 128-bit uuid for journal */
+
+/* 0x0040 */
+ u32int nr_users; /* Nr of filesystems sharing log */
+
+ u32int dynsuper; /* Blocknr of dynamic superblock copy*/
+
+/* 0x0048 */
+ u32int max_transaction; /* Limit of journal blocks per trans.*/
+ u32int max_trandata; /* Limit of data blocks per trans. */
+
+/* 0x0050 */
+ u8int checksum_type; /* checksum type */
+ u8int padding2[3];
+ u32int padding[42];
+ u32int checksum; /* crc32c(superblock) */
+
+/* 0x0100 */
+ u8int users[JBD_USERS_SIZE]; /* ids of all fs'es sharing the log */
+
+/* 0x0400 */
+};
+
+#pragma pack off
+
+#define JBD_SUPERBLOCK_SIZE sizeof(struct jbd_sb)
+
+#define JBD_HAS_COMPAT_FEATURE(jsb,mask) \
+ ((jsb)->header.blocktype >= to_be32(2) && \
+ ((jsb)->feature_compat & to_be32((mask))))
+#define JBD_HAS_RO_COMPAT_FEATURE(jsb,mask) \
+ ((jsb)->header.blocktype >= to_be32(2) && \
+ ((jsb)->feature_ro_compat & to_be32((mask))))
+#define JBD_HAS_INCOMPAT_FEATURE(jsb,mask) \
+ ((jsb)->header.blocktype >= to_be32(2) && \
+ ((jsb)->feature_incompat & to_be32((mask))))
+
+#define JBD_FEATURE_COMPAT_CHECKSUM 0x00000001
+
+#define JBD_FEATURE_INCOMPAT_REVOKE 0x00000001
+#define JBD_FEATURE_INCOMPAT_64BIT 0x00000002
+#define JBD_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004
+#define JBD_FEATURE_INCOMPAT_CSUM_V2 0x00000008
+#define JBD_FEATURE_INCOMPAT_CSUM_V3 0x00000010
+
+/* Features known to this kernel version: */
+#define JBD_KNOWN_COMPAT_FEATURES 0
+#define JBD_KNOWN_ROCOMPAT_FEATURES 0
+#define JBD_KNOWN_INCOMPAT_FEATURES (JBD_FEATURE_INCOMPAT_REVOKE|\
+ JBD_FEATURE_INCOMPAT_ASYNC_COMMIT|\
+ JBD_FEATURE_INCOMPAT_64BIT|\
+ JBD_FEATURE_INCOMPAT_CSUM_V2|\
+ JBD_FEATURE_INCOMPAT_CSUM_V3)
+
+/*****************************************************************************/
+
+#define EXT4_CRC32_INIT (0xFFFFFFFFUL)
+
+/*****************************************************************************/
+
+#define ext4_malloc malloc
+#define ext4_calloc calloc
+#define ext4_realloc realloc
+#define ext4_free free
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/queue.h
@@ -1,0 +1,612 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)queue.h 8.5 (Berkeley) 8/20/94
+ * $FreeBSD$
+ */
+
+#pragma once
+
+/*
+ * This file defines four types of data structures: singly-linked lists,
+ * singly-linked tail queues, lists and tail queues.
+ *
+ * A singly-linked list is headed by a single forward pointer. The elements
+ * are singly linked for minimum space and pointer manipulation overhead at
+ * the expense of O(n) removal for arbitrary elements. New elements can be
+ * added to the list after an existing element or at the head of the list.
+ * Elements being removed from the head of the list should use the explicit
+ * macro for this purpose for optimum efficiency. A singly-linked list may
+ * only be traversed in the forward direction. Singly-linked lists are ideal
+ * for applications with large datasets and few or no removals or for
+ * implementing a LIFO queue.
+ *
+ * A singly-linked tail queue is headed by a pair of pointers, one to the
+ * head of the list and the other to the tail of the list. The elements are
+ * singly linked for minimum space and pointer manipulation overhead at the
+ * expense of O(n) removal for arbitrary elements. New elements can be added
+ * to the list after an existing element, at the head of the list, or at the
+ * end of the list. Elements being removed from the head of the tail queue
+ * should use the explicit macro for this purpose for optimum efficiency.
+ * A singly-linked tail queue may only be traversed in the forward direction.
+ * Singly-linked tail queues are ideal for applications with large datasets
+ * and few or no removals or for implementing a FIFO queue.
+ *
+ * A list is headed by a single forward pointer (or an array of forward
+ * pointers for a hash table header). The elements are doubly linked
+ * so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before
+ * or after an existing element or at the head of the list. A list
+ * may be traversed in either direction.
+ *
+ * A tail queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or
+ * after an existing element, at the head of the list, or at the end of
+ * the list. A tail queue may be traversed in either direction.
+ *
+ * For details on the use of these macros, see the queue(3) manual page.
+ *
+ *
+ * SLIST LIST STAILQ TAILQ
+ * _HEAD + + + +
+ * _HEAD_INITIALIZER + + + +
+ * _ENTRY + + + +
+ * _INIT + + + +
+ * _EMPTY + + + +
+ * _FIRST + + + +
+ * _NEXT + + + +
+ * _PREV - + - +
+ * _LAST - - + +
+ * _FOREACH + + + +
+ * _FOREACH_FROM + + + +
+ * _FOREACH_SAFE + + + +
+ * _FOREACH_FROM_SAFE + + + +
+ * _FOREACH_REVERSE - - - +
+ * _FOREACH_REVERSE_FROM - - - +
+ * _FOREACH_REVERSE_SAFE - - - +
+ * _FOREACH_REVERSE_FROM_SAFE - - - +
+ * _INSERT_HEAD + + + +
+ * _INSERT_BEFORE - + - +
+ * _INSERT_AFTER + + + +
+ * _INSERT_TAIL - - + +
+ * _CONCAT - - + +
+ * _REMOVE_AFTER + - + -
+ * _REMOVE_HEAD + - + -
+ * _REMOVE + + + +
+ * _SWAP + + + +
+ *
+ */
+#define QMD_TRACE_ELEM(elem)
+#define QMD_TRACE_HEAD(head)
+#define QMD_SAVELINK(name, link)
+#define TRACEBUF
+#define TRACEBUF_INITIALIZER
+#define TRASHIT(x)
+
+/*
+ * Singly-linked List declarations.
+ */
+#define SLIST_HEAD(name, type) \
+struct name { \
+ struct type *slh_first; /* first element */ \
+}
+
+#define SLIST_HEAD_INITIALIZER(head) \
+ { nil }
+
+#define SLIST_ENTRY(type) \
+struct { \
+ struct type *sle_next; /* next element */ \
+}
+
+/*
+ * Singly-linked List functions.
+ */
+#define SLIST_EMPTY(head) ((head)->slh_first == nil)
+
+#define SLIST_FIRST(head) ((head)->slh_first)
+
+#define SLIST_FOREACH(var, head, field) \
+ for ((var) = SLIST_FIRST((head)); \
+ (var); \
+ (var) = SLIST_NEXT((var), field))
+
+#define SLIST_FOREACH_FROM(var, head, field) \
+ for ((var) = ((var) ? (var) : SLIST_FIRST((head))); \
+ (var); \
+ (var) = SLIST_NEXT((var), field))
+
+#define SLIST_FOREACH_SAFE(var, head, field, tvar) \
+ for ((var) = SLIST_FIRST((head)); \
+ (var) && ((tvar) = SLIST_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define SLIST_FOREACH_FROM_SAFE(var, head, field, tvar) \
+ for ((var) = ((var) ? (var) : SLIST_FIRST((head))); \
+ (var) && ((tvar) = SLIST_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define SLIST_FOREACH_PREVPTR(var, varp, head, field) \
+ for ((varp) = &SLIST_FIRST((head)); \
+ ((var) = *(varp)) != nil; \
+ (varp) = &SLIST_NEXT((var), field))
+
+#define SLIST_INIT(head) do { \
+ SLIST_FIRST((head)) = nil; \
+} while (0)
+
+#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \
+ SLIST_NEXT((elm), field) = SLIST_NEXT((slistelm), field); \
+ SLIST_NEXT((slistelm), field) = (elm); \
+} while (0)
+
+#define SLIST_INSERT_HEAD(head, elm, field) do { \
+ SLIST_NEXT((elm), field) = SLIST_FIRST((head)); \
+ SLIST_FIRST((head)) = (elm); \
+} while (0)
+
+#define SLIST_NEXT(elm, field) ((elm)->field.sle_next)
+
+#define SLIST_REMOVE(head, elm, type, field) do { \
+ QMD_SAVELINK(oldnext, (elm)->field.sle_next); \
+ if (SLIST_FIRST((head)) == (elm)) { \
+ SLIST_REMOVE_HEAD((head), field); \
+ } \
+ else { \
+ struct type *curelm = SLIST_FIRST((head)); \
+ while (SLIST_NEXT(curelm, field) != (elm)) \
+ curelm = SLIST_NEXT(curelm, field); \
+ SLIST_REMOVE_AFTER(curelm, field); \
+ } \
+ TRASHIT(*oldnext); \
+} while (0)
+
+#define SLIST_REMOVE_AFTER(elm, field) do { \
+ SLIST_NEXT(elm, field) = \
+ SLIST_NEXT(SLIST_NEXT(elm, field), field); \
+} while (0)
+
+#define SLIST_REMOVE_HEAD(head, field) do { \
+ SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field); \
+} while (0)
+
+#define SLIST_SWAP(head1, head2, type) do { \
+ struct type *swap_first = SLIST_FIRST(head1); \
+ SLIST_FIRST(head1) = SLIST_FIRST(head2); \
+ SLIST_FIRST(head2) = swap_first; \
+} while (0)
+
+/*
+ * Singly-linked Tail queue declarations.
+ */
+#define STAILQ_HEAD(name, type) \
+struct name { \
+ struct type *stqh_first;/* first element */ \
+ struct type **stqh_last;/* addr of last next element */ \
+}
+
+#define STAILQ_HEAD_INITIALIZER(head) \
+ { nil, &(head).stqh_first }
+
+#define STAILQ_ENTRY(type) \
+struct { \
+ struct type *stqe_next; /* next element */ \
+}
+
+/*
+ * Singly-linked Tail queue functions.
+ */
+#define STAILQ_CONCAT(head1, head2) do { \
+ if (!STAILQ_EMPTY((head2))) { \
+ *(head1)->stqh_last = (head2)->stqh_first; \
+ (head1)->stqh_last = (head2)->stqh_last; \
+ STAILQ_INIT((head2)); \
+ } \
+} while (0)
+
+#define STAILQ_EMPTY(head) ((head)->stqh_first == nil)
+
+#define STAILQ_FIRST(head) ((head)->stqh_first)
+
+#define STAILQ_FOREACH(var, head, field) \
+ for((var) = STAILQ_FIRST((head)); \
+ (var); \
+ (var) = STAILQ_NEXT((var), field))
+
+#define STAILQ_FOREACH_FROM(var, head, field) \
+ for ((var) = ((var) ? (var) : STAILQ_FIRST((head))); \
+ (var); \
+ (var) = STAILQ_NEXT((var), field))
+
+#define STAILQ_FOREACH_SAFE(var, head, field, tvar) \
+ for ((var) = STAILQ_FIRST((head)); \
+ (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define STAILQ_FOREACH_FROM_SAFE(var, head, field, tvar) \
+ for ((var) = ((var) ? (var) : STAILQ_FIRST((head))); \
+ (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define STAILQ_INIT(head) do { \
+ STAILQ_FIRST((head)) = nil; \
+ (head)->stqh_last = &STAILQ_FIRST((head)); \
+} while (0)
+
+#define STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \
+ if ((STAILQ_NEXT((elm), field) = STAILQ_NEXT((tqelm), field)) == nil)\
+ (head)->stqh_last = &STAILQ_NEXT((elm), field); \
+ STAILQ_NEXT((tqelm), field) = (elm); \
+} while (0)
+
+#define STAILQ_INSERT_HEAD(head, elm, field) do { \
+ if ((STAILQ_NEXT((elm), field) = STAILQ_FIRST((head))) == nil) \
+ (head)->stqh_last = &STAILQ_NEXT((elm), field); \
+ STAILQ_FIRST((head)) = (elm); \
+} while (0)
+
+#define STAILQ_INSERT_TAIL(head, elm, field) do { \
+ STAILQ_NEXT((elm), field) = nil; \
+ *(head)->stqh_last = (elm); \
+ (head)->stqh_last = &STAILQ_NEXT((elm), field); \
+} while (0)
+
+#define STAILQ_LAST(head, type, field) \
+ (STAILQ_EMPTY((head)) ? nil : \
+ __containerof((head)->stqh_last, struct type, field.stqe_next))
+
+#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next)
+
+#define STAILQ_REMOVE(head, elm, type, field) do { \
+ QMD_SAVELINK(oldnext, (elm)->field.stqe_next); \
+ if (STAILQ_FIRST((head)) == (elm)) { \
+ STAILQ_REMOVE_HEAD((head), field); \
+ } \
+ else { \
+ struct type *curelm = STAILQ_FIRST((head)); \
+ while (STAILQ_NEXT(curelm, field) != (elm)) \
+ curelm = STAILQ_NEXT(curelm, field); \
+ STAILQ_REMOVE_AFTER(head, curelm, field); \
+ } \
+ TRASHIT(*oldnext); \
+} while (0)
+
+#define STAILQ_REMOVE_AFTER(head, elm, field) do { \
+ if ((STAILQ_NEXT(elm, field) = \
+ STAILQ_NEXT(STAILQ_NEXT(elm, field), field)) == nil) \
+ (head)->stqh_last = &STAILQ_NEXT((elm), field); \
+} while (0)
+
+#define STAILQ_REMOVE_HEAD(head, field) do { \
+ if ((STAILQ_FIRST((head)) = \
+ STAILQ_NEXT(STAILQ_FIRST((head)), field)) == nil) \
+ (head)->stqh_last = &STAILQ_FIRST((head)); \
+} while (0)
+
+#define STAILQ_SWAP(head1, head2, type) do { \
+ struct type *swap_first = STAILQ_FIRST(head1); \
+ struct type **swap_last = (head1)->stqh_last; \
+ STAILQ_FIRST(head1) = STAILQ_FIRST(head2); \
+ (head1)->stqh_last = (head2)->stqh_last; \
+ STAILQ_FIRST(head2) = swap_first; \
+ (head2)->stqh_last = swap_last; \
+ if (STAILQ_EMPTY(head1)) \
+ (head1)->stqh_last = &STAILQ_FIRST(head1); \
+ if (STAILQ_EMPTY(head2)) \
+ (head2)->stqh_last = &STAILQ_FIRST(head2); \
+} while (0)
+
+
+/*
+ * List declarations.
+ */
+#define LIST_HEAD(name, type) \
+struct name { \
+ struct type *lh_first; /* first element */ \
+}
+
+#define LIST_HEAD_INITIALIZER(head) \
+ { nil }
+
+#define LIST_ENTRY(type) \
+struct { \
+ struct type *le_next; /* next element */ \
+ struct type **le_prev; /* address of previous next element */ \
+}
+
+/*
+ * List functions.
+ */
+
+#define QMD_LIST_CHECK_HEAD(head, field)
+#define QMD_LIST_CHECK_NEXT(elm, field)
+#define QMD_LIST_CHECK_PREV(elm, field)
+
+#define LIST_EMPTY(head) ((head)->lh_first == nil)
+
+#define LIST_FIRST(head) ((head)->lh_first)
+
+#define LIST_FOREACH(var, head, field) \
+ for ((var) = LIST_FIRST((head)); \
+ (var); \
+ (var) = LIST_NEXT((var), field))
+
+#define LIST_FOREACH_FROM(var, head, field) \
+ for ((var) = ((var) ? (var) : LIST_FIRST((head))); \
+ (var); \
+ (var) = LIST_NEXT((var), field))
+
+#define LIST_FOREACH_SAFE(var, head, field, tvar) \
+ for ((var) = LIST_FIRST((head)); \
+ (var) && ((tvar) = LIST_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define LIST_FOREACH_FROM_SAFE(var, head, field, tvar) \
+ for ((var) = ((var) ? (var) : LIST_FIRST((head))); \
+ (var) && ((tvar) = LIST_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define LIST_INIT(head) do { \
+ LIST_FIRST((head)) = nil; \
+} while (0)
+
+#define LIST_INSERT_AFTER(listelm, elm, field) do { \
+ QMD_LIST_CHECK_NEXT(listelm, field); \
+ if ((LIST_NEXT((elm), field) = LIST_NEXT((listelm), field)) != nil)\
+ LIST_NEXT((listelm), field)->field.le_prev = \
+ &LIST_NEXT((elm), field); \
+ LIST_NEXT((listelm), field) = (elm); \
+ (elm)->field.le_prev = &LIST_NEXT((listelm), field); \
+} while (0)
+
+#define LIST_INSERT_BEFORE(listelm, elm, field) do { \
+ QMD_LIST_CHECK_PREV(listelm, field); \
+ (elm)->field.le_prev = (listelm)->field.le_prev; \
+ LIST_NEXT((elm), field) = (listelm); \
+ *(listelm)->field.le_prev = (elm); \
+ (listelm)->field.le_prev = &LIST_NEXT((elm), field); \
+} while (0)
+
+#define LIST_INSERT_HEAD(head, elm, field) do { \
+ QMD_LIST_CHECK_HEAD((head), field); \
+ if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != nil) \
+ LIST_FIRST((head))->field.le_prev = &LIST_NEXT((elm), field);\
+ LIST_FIRST((head)) = (elm); \
+ (elm)->field.le_prev = &LIST_FIRST((head)); \
+} while (0)
+
+#define LIST_NEXT(elm, field) ((elm)->field.le_next)
+
+#define LIST_PREV(elm, head, type, field) \
+ ((elm)->field.le_prev == &LIST_FIRST((head)) ? nil : \
+ __containerof((elm)->field.le_prev, struct type, field.le_next))
+
+#define LIST_REMOVE(elm, field) do { \
+ QMD_SAVELINK(oldnext, (elm)->field.le_next); \
+ QMD_SAVELINK(oldprev, (elm)->field.le_prev); \
+ QMD_LIST_CHECK_NEXT(elm, field); \
+ QMD_LIST_CHECK_PREV(elm, field); \
+ if (LIST_NEXT((elm), field) != nil) \
+ LIST_NEXT((elm), field)->field.le_prev = \
+ (elm)->field.le_prev; \
+ *(elm)->field.le_prev = LIST_NEXT((elm), field); \
+ TRASHIT(*oldnext); \
+ TRASHIT(*oldprev); \
+} while (0)
+
+#define LIST_SWAP(head1, head2, type, field) do { \
+ struct type *swap_tmp = LIST_FIRST((head1)); \
+ LIST_FIRST((head1)) = LIST_FIRST((head2)); \
+ LIST_FIRST((head2)) = swap_tmp; \
+ if ((swap_tmp = LIST_FIRST((head1))) != nil) \
+ swap_tmp->field.le_prev = &LIST_FIRST((head1)); \
+ if ((swap_tmp = LIST_FIRST((head2))) != nil) \
+ swap_tmp->field.le_prev = &LIST_FIRST((head2)); \
+} while (0)
+
+/*
+ * Tail queue declarations.
+ */
+#define TAILQ_HEAD(name, type) \
+struct name { \
+ struct type *tqh_first; /* first element */ \
+ struct type **tqh_last; /* addr of last next element */ \
+ TRACEBUF \
+}
+
+#define TAILQ_HEAD_INITIALIZER(head) \
+ { nil, &(head).tqh_first, TRACEBUF_INITIALIZER }
+
+#define TAILQ_ENTRY(type) \
+struct { \
+ struct type *tqe_next; /* next element */ \
+ struct type **tqe_prev; /* address of previous next element */ \
+ TRACEBUF \
+}
+
+/*
+ * Tail queue functions.
+ */
+#define QMD_TAILQ_CHECK_HEAD(head, field)
+#define QMD_TAILQ_CHECK_TAIL(head, headname)
+#define QMD_TAILQ_CHECK_NEXT(elm, field)
+#define QMD_TAILQ_CHECK_PREV(elm, field)
+
+#define TAILQ_CONCAT(head1, head2, field) do { \
+ if (!TAILQ_EMPTY(head2)) { \
+ *(head1)->tqh_last = (head2)->tqh_first; \
+ (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \
+ (head1)->tqh_last = (head2)->tqh_last; \
+ TAILQ_INIT((head2)); \
+ QMD_TRACE_HEAD(head1); \
+ QMD_TRACE_HEAD(head2); \
+ } \
+} while (0)
+
+#define TAILQ_EMPTY(head) ((head)->tqh_first == nil)
+
+#define TAILQ_FIRST(head) ((head)->tqh_first)
+
+#define TAILQ_FOREACH(var, head, field) \
+ for ((var) = TAILQ_FIRST((head)); \
+ (var); \
+ (var) = TAILQ_NEXT((var), field))
+
+#define TAILQ_FOREACH_FROM(var, head, field) \
+ for ((var) = ((var) ? (var) : TAILQ_FIRST((head))); \
+ (var); \
+ (var) = TAILQ_NEXT((var), field))
+
+#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \
+ for ((var) = TAILQ_FIRST((head)); \
+ (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define TAILQ_FOREACH_FROM_SAFE(var, head, field, tvar) \
+ for ((var) = ((var) ? (var) : TAILQ_FIRST((head))); \
+ (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \
+ for ((var) = TAILQ_LAST((head), headname); \
+ (var); \
+ (var) = TAILQ_PREV((var), headname, field))
+
+#define TAILQ_FOREACH_REVERSE_FROM(var, head, headname, field) \
+ for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname)); \
+ (var); \
+ (var) = TAILQ_PREV((var), headname, field))
+
+#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \
+ for ((var) = TAILQ_LAST((head), headname); \
+ (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \
+ (var) = (tvar))
+
+#define TAILQ_FOREACH_REVERSE_FROM_SAFE(var, head, headname, field, tvar) \
+ for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname)); \
+ (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \
+ (var) = (tvar))
+
+#define TAILQ_INIT(head) do { \
+ TAILQ_FIRST((head)) = nil; \
+ (head)->tqh_last = &TAILQ_FIRST((head)); \
+ QMD_TRACE_HEAD(head); \
+} while (0)
+
+#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
+ QMD_TAILQ_CHECK_NEXT(listelm, field); \
+ if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != nil)\
+ TAILQ_NEXT((elm), field)->field.tqe_prev = \
+ &TAILQ_NEXT((elm), field); \
+ else { \
+ (head)->tqh_last = &TAILQ_NEXT((elm), field); \
+ QMD_TRACE_HEAD(head); \
+ } \
+ TAILQ_NEXT((listelm), field) = (elm); \
+ (elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+ QMD_TRACE_ELEM(&(listelm)->field); \
+} while (0)
+
+#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \
+ QMD_TAILQ_CHECK_PREV(listelm, field); \
+ (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \
+ TAILQ_NEXT((elm), field) = (listelm); \
+ *(listelm)->field.tqe_prev = (elm); \
+ (listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+ QMD_TRACE_ELEM(&(listelm)->field); \
+} while (0)
+
+#define TAILQ_INSERT_HEAD(head, elm, field) do { \
+ QMD_TAILQ_CHECK_HEAD(head, field); \
+ if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != nil) \
+ TAILQ_FIRST((head))->field.tqe_prev = \
+ &TAILQ_NEXT((elm), field); \
+ else \
+ (head)->tqh_last = &TAILQ_NEXT((elm), field); \
+ TAILQ_FIRST((head)) = (elm); \
+ (elm)->field.tqe_prev = &TAILQ_FIRST((head)); \
+ QMD_TRACE_HEAD(head); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+} while (0)
+
+#define TAILQ_INSERT_TAIL(head, elm, field) do { \
+ QMD_TAILQ_CHECK_TAIL(head, field); \
+ TAILQ_NEXT((elm), field) = nil; \
+ (elm)->field.tqe_prev = (head)->tqh_last; \
+ *(head)->tqh_last = (elm); \
+ (head)->tqh_last = &TAILQ_NEXT((elm), field); \
+ QMD_TRACE_HEAD(head); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+} while (0)
+
+#define TAILQ_LAST(head, headname) \
+ (*(((struct headname *)((head)->tqh_last))->tqh_last))
+
+#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
+
+#define TAILQ_PREV(elm, headname, field) \
+ (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
+
+#define TAILQ_REMOVE(head, elm, field) do { \
+ QMD_SAVELINK(oldnext, (elm)->field.tqe_next); \
+ QMD_SAVELINK(oldprev, (elm)->field.tqe_prev); \
+ QMD_TAILQ_CHECK_NEXT(elm, field); \
+ QMD_TAILQ_CHECK_PREV(elm, field); \
+ if ((TAILQ_NEXT((elm), field)) != nil) \
+ TAILQ_NEXT((elm), field)->field.tqe_prev = \
+ (elm)->field.tqe_prev; \
+ else { \
+ (head)->tqh_last = (elm)->field.tqe_prev; \
+ QMD_TRACE_HEAD(head); \
+ } \
+ *(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \
+ TRASHIT(*oldnext); \
+ TRASHIT(*oldprev); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+} while (0)
+
+#define TAILQ_SWAP(head1, head2, type, field) do { \
+ struct type *swap_first = (head1)->tqh_first; \
+ struct type **swap_last = (head1)->tqh_last; \
+ (head1)->tqh_first = (head2)->tqh_first; \
+ (head1)->tqh_last = (head2)->tqh_last; \
+ (head2)->tqh_first = swap_first; \
+ (head2)->tqh_last = swap_last; \
+ if ((swap_first = (head1)->tqh_first) != nil) \
+ swap_first->field.tqe_prev = &(head1)->tqh_first; \
+ else \
+ (head1)->tqh_last = &(head1)->tqh_first; \
+ if ((swap_first = (head2)->tqh_first) != nil) \
+ swap_first->field.tqe_prev = &(head2)->tqh_first; \
+ else \
+ (head2)->tqh_last = &(head2)->tqh_first; \
+} while (0)
--- /dev/null
+++ b/sys/src/cmd/ext4srv/include/tree.h
@@ -1,0 +1,796 @@
+/* $NetBSD: tree.h,v 1.8 2004/03/28 19:38:30 provos Exp $ */
+/* $OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $ */
+/* $FreeBSD$ */
+
+/*-
+ * Copyright 2002 Niels Provos <[email protected]>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+/*
+ * This file defines data structures for different types of trees:
+ * splay trees and red-black trees.
+ *
+ * A splay tree is a self-organizing data structure. Every operation
+ * on the tree causes a splay to happen. The splay moves the requested
+ * node to the root of the tree and partly rebalances it.
+ *
+ * This has the benefit that request locality causes faster lookups as
+ * the requested nodes move to the top of the tree. On the other hand,
+ * every lookup causes memory writes.
+ *
+ * The Balance Theorem bounds the total access time for m operations
+ * and n inserts on an initially empty tree as O((m + n)lg n). The
+ * amortized cost for a sequence of m accesses to a splay tree is O(lg n);
+ *
+ * A red-black tree is a binary search tree with the node color as an
+ * extra attribute. It fulfills a set of conditions:
+ * - every search path from the root to a leaf consists of the
+ * same number of black nodes,
+ * - each red node (except for the root) has a black parent,
+ * - each leaf node is black.
+ *
+ * Every operation on a red-black tree is bounded as O(lg n).
+ * The maximum height of a red-black tree is 2lg (n+1).
+ */
+
+#define SPLAY_HEAD(name, type) \
+struct name { \
+ struct type *sph_root; /* root of the tree */ \
+}
+
+#define SPLAY_INITIALIZER(root) \
+ { nil }
+
+#define SPLAY_INIT(root) do { \
+ (root)->sph_root = nil; \
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ENTRY(type) \
+struct { \
+ struct type *spe_left; /* left element */ \
+ struct type *spe_right; /* right element */ \
+}
+
+#define SPLAY_LEFT(elm, field) (elm)->field.spe_left
+#define SPLAY_RIGHT(elm, field) (elm)->field.spe_right
+#define SPLAY_ROOT(head) (head)->sph_root
+#define SPLAY_EMPTY(head) (SPLAY_ROOT(head) == nil)
+
+/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */
+#define SPLAY_ROTATE_RIGHT(head, tmp, field) do { \
+ SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field); \
+ SPLAY_RIGHT(tmp, field) = (head)->sph_root; \
+ (head)->sph_root = tmp; \
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ROTATE_LEFT(head, tmp, field) do { \
+ SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field); \
+ SPLAY_LEFT(tmp, field) = (head)->sph_root; \
+ (head)->sph_root = tmp; \
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_LINKLEFT(head, tmp, field) do { \
+ SPLAY_LEFT(tmp, field) = (head)->sph_root; \
+ tmp = (head)->sph_root; \
+ (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_LINKRIGHT(head, tmp, field) do { \
+ SPLAY_RIGHT(tmp, field) = (head)->sph_root; \
+ tmp = (head)->sph_root; \
+ (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ASSEMBLE(head, node, left, right, field) do { \
+ SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field); \
+ SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\
+ SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field); \
+ SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field); \
+} while (/*CONSTCOND*/ 0)
+
+/* Generates prototypes and inline functions */
+
+#define SPLAY_PROTOTYPE(name, type, field, cmp) \
+void name##_SPLAY(struct name *, struct type *); \
+void name##_SPLAY_MINMAX(struct name *, int); \
+struct type *name##_SPLAY_INSERT(struct name *, struct type *); \
+struct type *name##_SPLAY_REMOVE(struct name *, struct type *); \
+ \
+/* Finds the node with the same key as elm */ \
+static __inline struct type * \
+name##_SPLAY_FIND(struct name *head, struct type *elm) \
+{ \
+ if (SPLAY_EMPTY(head)) \
+ return(nil); \
+ name##_SPLAY(head, elm); \
+ if ((cmp)(elm, (head)->sph_root) == 0) \
+ return (head->sph_root); \
+ return (nil); \
+} \
+ \
+static __inline struct type * \
+name##_SPLAY_NEXT(struct name *head, struct type *elm) \
+{ \
+ name##_SPLAY(head, elm); \
+ if (SPLAY_RIGHT(elm, field) != nil) { \
+ elm = SPLAY_RIGHT(elm, field); \
+ while (SPLAY_LEFT(elm, field) != nil) { \
+ elm = SPLAY_LEFT(elm, field); \
+ } \
+ } else \
+ elm = nil; \
+ return (elm); \
+} \
+ \
+static __inline struct type * \
+name##_SPLAY_MIN_MAX(struct name *head, int val) \
+{ \
+ name##_SPLAY_MINMAX(head, val); \
+ return (SPLAY_ROOT(head)); \
+}
+
+/* Main splay operation.
+ * Moves node close to the key of elm to top
+ */
+#define SPLAY_GENERATE(name, type, field, cmp) \
+struct type * \
+name##_SPLAY_INSERT(struct name *head, struct type *elm) \
+{ \
+ if (SPLAY_EMPTY(head)) { \
+ SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = nil; \
+ } else { \
+ int __comp; \
+ name##_SPLAY(head, elm); \
+ __comp = (cmp)(elm, (head)->sph_root); \
+ if(__comp < 0) { \
+ SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\
+ SPLAY_RIGHT(elm, field) = (head)->sph_root; \
+ SPLAY_LEFT((head)->sph_root, field) = nil; \
+ } else if (__comp > 0) { \
+ SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\
+ SPLAY_LEFT(elm, field) = (head)->sph_root; \
+ SPLAY_RIGHT((head)->sph_root, field) = nil; \
+ } else \
+ return ((head)->sph_root); \
+ } \
+ (head)->sph_root = (elm); \
+ return (nil); \
+} \
+ \
+struct type * \
+name##_SPLAY_REMOVE(struct name *head, struct type *elm) \
+{ \
+ struct type *__tmp; \
+ if (SPLAY_EMPTY(head)) \
+ return (nil); \
+ name##_SPLAY(head, elm); \
+ if ((cmp)(elm, (head)->sph_root) == 0) { \
+ if (SPLAY_LEFT((head)->sph_root, field) == nil) { \
+ (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\
+ } else { \
+ __tmp = SPLAY_RIGHT((head)->sph_root, field); \
+ (head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\
+ name##_SPLAY(head, elm); \
+ SPLAY_RIGHT((head)->sph_root, field) = __tmp; \
+ } \
+ return (elm); \
+ } \
+ return (nil); \
+} \
+ \
+void \
+name##_SPLAY(struct name *head, struct type *elm) \
+{ \
+ struct type __node, *__left, *__right, *__tmp; \
+ int __comp; \
+\
+ SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = nil;\
+ __left = __right = &__node; \
+\
+ while ((__comp = (cmp)(elm, (head)->sph_root)) != 0) { \
+ if (__comp < 0) { \
+ __tmp = SPLAY_LEFT((head)->sph_root, field); \
+ if (__tmp == nil) \
+ break; \
+ if ((cmp)(elm, __tmp) < 0){ \
+ SPLAY_ROTATE_RIGHT(head, __tmp, field); \
+ if (SPLAY_LEFT((head)->sph_root, field) == nil)\
+ break; \
+ } \
+ SPLAY_LINKLEFT(head, __right, field); \
+ } else if (__comp > 0) { \
+ __tmp = SPLAY_RIGHT((head)->sph_root, field); \
+ if (__tmp == nil) \
+ break; \
+ if ((cmp)(elm, __tmp) > 0){ \
+ SPLAY_ROTATE_LEFT(head, __tmp, field); \
+ if (SPLAY_RIGHT((head)->sph_root, field) == nil)\
+ break; \
+ } \
+ SPLAY_LINKRIGHT(head, __left, field); \
+ } \
+ } \
+ SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \
+} \
+ \
+/* Splay with either the minimum or the maximum element \
+ * Used to find minimum or maximum element in tree. \
+ */ \
+void name##_SPLAY_MINMAX(struct name *head, int __comp) \
+{ \
+ struct type __node, *__left, *__right, *__tmp; \
+\
+ SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = nil;\
+ __left = __right = &__node; \
+\
+ while (1) { \
+ if (__comp < 0) { \
+ __tmp = SPLAY_LEFT((head)->sph_root, field); \
+ if (__tmp == nil) \
+ break; \
+ if (__comp < 0){ \
+ SPLAY_ROTATE_RIGHT(head, __tmp, field); \
+ if (SPLAY_LEFT((head)->sph_root, field) == nil)\
+ break; \
+ } \
+ SPLAY_LINKLEFT(head, __right, field); \
+ } else if (__comp > 0) { \
+ __tmp = SPLAY_RIGHT((head)->sph_root, field); \
+ if (__tmp == nil) \
+ break; \
+ if (__comp > 0) { \
+ SPLAY_ROTATE_LEFT(head, __tmp, field); \
+ if (SPLAY_RIGHT((head)->sph_root, field) == nil)\
+ break; \
+ } \
+ SPLAY_LINKRIGHT(head, __left, field); \
+ } \
+ } \
+ SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \
+}
+
+#define SPLAY_NEGINF -1
+#define SPLAY_INF 1
+
+#define SPLAY_INSERT(name, x, y) name##_SPLAY_INSERT(x, y)
+#define SPLAY_REMOVE(name, x, y) name##_SPLAY_REMOVE(x, y)
+#define SPLAY_FIND(name, x, y) name##_SPLAY_FIND(x, y)
+#define SPLAY_NEXT(name, x, y) name##_SPLAY_NEXT(x, y)
+#define SPLAY_MIN(name, x) (SPLAY_EMPTY(x) ? nil \
+ : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))
+#define SPLAY_MAX(name, x) (SPLAY_EMPTY(x) ? nil \
+ : name##_SPLAY_MIN_MAX(x, SPLAY_INF))
+
+#define SPLAY_FOREACH(x, name, head) \
+ for ((x) = SPLAY_MIN(name, head); \
+ (x) != nil; \
+ (x) = SPLAY_NEXT(name, head, x))
+
+/* Macros that define a red-black tree */
+#define RB_HEAD(name, type) \
+struct name { \
+ struct type *rbh_root; /* root of the tree */ \
+}
+
+#define RB_INITIALIZER(root) \
+ { nil }
+
+#define RB_INIT(root) do { \
+ (root)->rbh_root = nil; \
+} while (/*CONSTCOND*/ 0)
+
+#define RB_BLACK 0
+#define RB_RED 1
+#define RB_ENTRY(type) \
+struct { \
+ struct type *rbe_left; /* left element */ \
+ struct type *rbe_right; /* right element */ \
+ struct type *rbe_parent; /* parent element */ \
+ int rbe_color; /* node color */ \
+}
+
+#define RB_LEFT(elm, field) (elm)->field.rbe_left
+#define RB_RIGHT(elm, field) (elm)->field.rbe_right
+#define RB_PARENT(elm, field) (elm)->field.rbe_parent
+#define RB_COLOR(elm, field) (elm)->field.rbe_color
+#define RB_ROOT(head) (head)->rbh_root
+#define RB_EMPTY(head) (RB_ROOT(head) == nil)
+
+#define RB_SET(elm, parent, field) do { \
+ RB_PARENT(elm, field) = parent; \
+ RB_LEFT(elm, field) = RB_RIGHT(elm, field) = nil; \
+ RB_COLOR(elm, field) = RB_RED; \
+} while (/*CONSTCOND*/ 0)
+
+#define RB_SET_BLACKRED(black, red, field) do { \
+ RB_COLOR(black, field) = RB_BLACK; \
+ RB_COLOR(red, field) = RB_RED; \
+} while (/*CONSTCOND*/ 0)
+
+#ifndef RB_AUGMENT
+#define RB_AUGMENT(x) do {} while (0)
+#endif
+
+#define RB_ROTATE_LEFT(head, elm, tmp, field) do { \
+ (tmp) = RB_RIGHT(elm, field); \
+ if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field)) != nil) { \
+ RB_PARENT(RB_LEFT(tmp, field), field) = (elm); \
+ } \
+ RB_AUGMENT(elm); \
+ if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != nil) { \
+ if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \
+ RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \
+ else \
+ RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \
+ } else \
+ (head)->rbh_root = (tmp); \
+ RB_LEFT(tmp, field) = (elm); \
+ RB_PARENT(elm, field) = (tmp); \
+ RB_AUGMENT(tmp); \
+ if ((RB_PARENT(tmp, field))) \
+ RB_AUGMENT(RB_PARENT(tmp, field)); \
+} while (/*CONSTCOND*/ 0)
+
+#define RB_ROTATE_RIGHT(head, elm, tmp, field) do { \
+ (tmp) = RB_LEFT(elm, field); \
+ if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field)) != nil) { \
+ RB_PARENT(RB_RIGHT(tmp, field), field) = (elm); \
+ } \
+ RB_AUGMENT(elm); \
+ if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != nil) { \
+ if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \
+ RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \
+ else \
+ RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \
+ } else \
+ (head)->rbh_root = (tmp); \
+ RB_RIGHT(tmp, field) = (elm); \
+ RB_PARENT(elm, field) = (tmp); \
+ RB_AUGMENT(tmp); \
+ if ((RB_PARENT(tmp, field))) \
+ RB_AUGMENT(RB_PARENT(tmp, field)); \
+} while (/*CONSTCOND*/ 0)
+
+/* Generates prototypes and inline functions */
+#define RB_PROTOTYPE(name, type, field, cmp) \
+ RB_PROTOTYPE_INTERNAL(name, type, field, cmp,)
+#define RB_PROTOTYPE_STATIC(name, type, field, cmp) \
+ RB_PROTOTYPE_INTERNAL(name, type, field, cmp, static)
+#define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr) \
+ RB_PROTOTYPE_INSERT_COLOR(name, type, attr); \
+ RB_PROTOTYPE_REMOVE_COLOR(name, type, attr); \
+ RB_PROTOTYPE_INSERT(name, type, attr); \
+ RB_PROTOTYPE_REMOVE(name, type, attr); \
+ RB_PROTOTYPE_FIND(name, type, attr); \
+ RB_PROTOTYPE_NFIND(name, type, attr); \
+ RB_PROTOTYPE_NEXT(name, type, attr); \
+ RB_PROTOTYPE_PREV(name, type, attr); \
+ RB_PROTOTYPE_MINMAX(name, type, attr);
+#define RB_PROTOTYPE_INSERT_COLOR(name, type, attr) \
+ attr void name##_RB_INSERT_COLOR(struct name *, struct type *)
+#define RB_PROTOTYPE_REMOVE_COLOR(name, type, attr) \
+ attr void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *)
+#define RB_PROTOTYPE_REMOVE(name, type, attr) \
+ attr struct type *name##_RB_REMOVE(struct name *, struct type *)
+#define RB_PROTOTYPE_INSERT(name, type, attr) \
+ attr struct type *name##_RB_INSERT(struct name *, struct type *)
+#define RB_PROTOTYPE_FIND(name, type, attr) \
+ attr struct type *name##_RB_FIND(struct name *, struct type *)
+#define RB_PROTOTYPE_NFIND(name, type, attr) \
+ attr struct type *name##_RB_NFIND(struct name *, struct type *)
+#define RB_PROTOTYPE_NEXT(name, type, attr) \
+ attr struct type *name##_RB_NEXT(struct type *)
+#define RB_PROTOTYPE_PREV(name, type, attr) \
+ attr struct type *name##_RB_PREV(struct type *)
+#define RB_PROTOTYPE_MINMAX(name, type, attr) \
+ attr struct type *name##_RB_MINMAX(struct name *, int)
+
+/* Main rb operation.
+ * Moves node close to the key of elm to top
+ */
+#define RB_GENERATE(name, type, field, cmp) \
+ RB_GENERATE_INTERNAL(name, type, field, cmp,)
+#define RB_GENERATE_STATIC(name, type, field, cmp) \
+ RB_GENERATE_INTERNAL(name, type, field, cmp, static)
+#define RB_GENERATE_INTERNAL(name, type, field, cmp, attr) \
+ RB_GENERATE_INSERT_COLOR(name, type, field, attr) \
+ RB_GENERATE_REMOVE_COLOR(name, type, field, attr) \
+ RB_GENERATE_INSERT(name, type, field, cmp, attr) \
+ RB_GENERATE_REMOVE(name, type, field, attr) \
+ RB_GENERATE_FIND(name, type, field, cmp, attr) \
+ RB_GENERATE_NFIND(name, type, field, cmp, attr) \
+ RB_GENERATE_NEXT(name, type, field, attr) \
+ RB_GENERATE_PREV(name, type, field, attr) \
+ RB_GENERATE_MINMAX(name, type, field, attr)
+
+#define RB_GENERATE_INSERT_COLOR(name, type, field, attr) \
+attr void \
+name##_RB_INSERT_COLOR(struct name *head, struct type *elm) \
+{ \
+ struct type *parent, *gparent, *tmp; \
+ while ((parent = RB_PARENT(elm, field)) != nil && \
+ RB_COLOR(parent, field) == RB_RED) { \
+ gparent = RB_PARENT(parent, field); \
+ if (parent == RB_LEFT(gparent, field)) { \
+ tmp = RB_RIGHT(gparent, field); \
+ if (tmp && RB_COLOR(tmp, field) == RB_RED) { \
+ RB_COLOR(tmp, field) = RB_BLACK; \
+ RB_SET_BLACKRED(parent, gparent, field);\
+ elm = gparent; \
+ continue; \
+ } \
+ if (RB_RIGHT(parent, field) == elm) { \
+ RB_ROTATE_LEFT(head, parent, tmp, field);\
+ tmp = parent; \
+ parent = elm; \
+ elm = tmp; \
+ } \
+ RB_SET_BLACKRED(parent, gparent, field); \
+ RB_ROTATE_RIGHT(head, gparent, tmp, field); \
+ } else { \
+ tmp = RB_LEFT(gparent, field); \
+ if (tmp && RB_COLOR(tmp, field) == RB_RED) { \
+ RB_COLOR(tmp, field) = RB_BLACK; \
+ RB_SET_BLACKRED(parent, gparent, field);\
+ elm = gparent; \
+ continue; \
+ } \
+ if (RB_LEFT(parent, field) == elm) { \
+ RB_ROTATE_RIGHT(head, parent, tmp, field);\
+ tmp = parent; \
+ parent = elm; \
+ elm = tmp; \
+ } \
+ RB_SET_BLACKRED(parent, gparent, field); \
+ RB_ROTATE_LEFT(head, gparent, tmp, field); \
+ } \
+ } \
+ RB_COLOR(head->rbh_root, field) = RB_BLACK; \
+}
+
+#define RB_GENERATE_REMOVE_COLOR(name, type, field, attr) \
+attr void \
+name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \
+{ \
+ struct type *tmp; \
+ while ((elm == nil || RB_COLOR(elm, field) == RB_BLACK) && \
+ elm != RB_ROOT(head) && parent != nil) { \
+ if (RB_LEFT(parent, field) == elm) { \
+ tmp = RB_RIGHT(parent, field); \
+ if (RB_COLOR(tmp, field) == RB_RED) { \
+ RB_SET_BLACKRED(tmp, parent, field); \
+ RB_ROTATE_LEFT(head, parent, tmp, field);\
+ tmp = RB_RIGHT(parent, field); \
+ } \
+ if ((RB_LEFT(tmp, field) == nil || \
+ RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+ (RB_RIGHT(tmp, field) == nil || \
+ RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+ RB_COLOR(tmp, field) = RB_RED; \
+ elm = parent; \
+ parent = RB_PARENT(elm, field); \
+ } else { \
+ if (RB_RIGHT(tmp, field) == nil || \
+ RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\
+ struct type *oleft; \
+ if ((oleft = RB_LEFT(tmp, field)) \
+ != nil) \
+ RB_COLOR(oleft, field) = RB_BLACK;\
+ RB_COLOR(tmp, field) = RB_RED; \
+ RB_ROTATE_RIGHT(head, tmp, oleft, field);\
+ tmp = RB_RIGHT(parent, field); \
+ } \
+ RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+ RB_COLOR(parent, field) = RB_BLACK; \
+ if (RB_RIGHT(tmp, field)) \
+ RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\
+ RB_ROTATE_LEFT(head, parent, tmp, field);\
+ elm = RB_ROOT(head); \
+ break; \
+ } \
+ } else { \
+ tmp = RB_LEFT(parent, field); \
+ if (RB_COLOR(tmp, field) == RB_RED) { \
+ RB_SET_BLACKRED(tmp, parent, field); \
+ RB_ROTATE_RIGHT(head, parent, tmp, field);\
+ tmp = RB_LEFT(parent, field); \
+ } \
+ if ((RB_LEFT(tmp, field) == nil || \
+ RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+ (RB_RIGHT(tmp, field) == nil || \
+ RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+ RB_COLOR(tmp, field) = RB_RED; \
+ elm = parent; \
+ parent = RB_PARENT(elm, field); \
+ } else { \
+ if (RB_LEFT(tmp, field) == nil || \
+ RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\
+ struct type *oright; \
+ if ((oright = RB_RIGHT(tmp, field)) \
+ != nil) \
+ RB_COLOR(oright, field) = RB_BLACK;\
+ RB_COLOR(tmp, field) = RB_RED; \
+ RB_ROTATE_LEFT(head, tmp, oright, field);\
+ tmp = RB_LEFT(parent, field); \
+ } \
+ RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+ RB_COLOR(parent, field) = RB_BLACK; \
+ if (RB_LEFT(tmp, field)) \
+ RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\
+ RB_ROTATE_RIGHT(head, parent, tmp, field);\
+ elm = RB_ROOT(head); \
+ break; \
+ } \
+ } \
+ } \
+ if (elm) \
+ RB_COLOR(elm, field) = RB_BLACK; \
+}
+
+#define RB_GENERATE_REMOVE(name, type, field, attr) \
+attr struct type * \
+name##_RB_REMOVE(struct name *head, struct type *elm) \
+{ \
+ struct type *child, *parent, *old = elm; \
+ int color; \
+ if (RB_LEFT(elm, field) == nil) \
+ child = RB_RIGHT(elm, field); \
+ else if (RB_RIGHT(elm, field) == nil) \
+ child = RB_LEFT(elm, field); \
+ else { \
+ struct type *left; \
+ elm = RB_RIGHT(elm, field); \
+ while ((left = RB_LEFT(elm, field)) != nil) \
+ elm = left; \
+ child = RB_RIGHT(elm, field); \
+ parent = RB_PARENT(elm, field); \
+ color = RB_COLOR(elm, field); \
+ if (child) \
+ RB_PARENT(child, field) = parent; \
+ if (parent) { \
+ if (RB_LEFT(parent, field) == elm) \
+ RB_LEFT(parent, field) = child; \
+ else \
+ RB_RIGHT(parent, field) = child; \
+ RB_AUGMENT(parent); \
+ } else \
+ RB_ROOT(head) = child; \
+ if (RB_PARENT(elm, field) == old) \
+ parent = elm; \
+ (elm)->field = (old)->field; \
+ if (RB_PARENT(old, field)) { \
+ if (RB_LEFT(RB_PARENT(old, field), field) == old)\
+ RB_LEFT(RB_PARENT(old, field), field) = elm;\
+ else \
+ RB_RIGHT(RB_PARENT(old, field), field) = elm;\
+ RB_AUGMENT(RB_PARENT(old, field)); \
+ } else \
+ RB_ROOT(head) = elm; \
+ RB_PARENT(RB_LEFT(old, field), field) = elm; \
+ if (RB_RIGHT(old, field)) \
+ RB_PARENT(RB_RIGHT(old, field), field) = elm; \
+ if (parent) { \
+ left = parent; \
+ do { \
+ RB_AUGMENT(left); \
+ } while ((left = RB_PARENT(left, field)) != nil); \
+ } \
+ goto color; \
+ } \
+ parent = RB_PARENT(elm, field); \
+ color = RB_COLOR(elm, field); \
+ if (child) \
+ RB_PARENT(child, field) = parent; \
+ if (parent) { \
+ if (RB_LEFT(parent, field) == elm) \
+ RB_LEFT(parent, field) = child; \
+ else \
+ RB_RIGHT(parent, field) = child; \
+ RB_AUGMENT(parent); \
+ } else \
+ RB_ROOT(head) = child; \
+color: \
+ if (color == RB_BLACK) \
+ name##_RB_REMOVE_COLOR(head, parent, child); \
+ return (old); \
+} \
+
+#define RB_GENERATE_INSERT(name, type, field, cmp, attr) \
+/* Inserts a node into the RB tree */ \
+attr struct type * \
+name##_RB_INSERT(struct name *head, struct type *elm) \
+{ \
+ struct type *tmp; \
+ struct type *parent = nil; \
+ int comp = 0; \
+ tmp = RB_ROOT(head); \
+ while (tmp) { \
+ parent = tmp; \
+ comp = (cmp)(elm, parent); \
+ if (comp < 0) \
+ tmp = RB_LEFT(tmp, field); \
+ else if (comp > 0) \
+ tmp = RB_RIGHT(tmp, field); \
+ else \
+ return (tmp); \
+ } \
+ RB_SET(elm, parent, field); \
+ if (parent != nil) { \
+ if (comp < 0) \
+ RB_LEFT(parent, field) = elm; \
+ else \
+ RB_RIGHT(parent, field) = elm; \
+ RB_AUGMENT(parent); \
+ } else \
+ RB_ROOT(head) = elm; \
+ name##_RB_INSERT_COLOR(head, elm); \
+ return (nil); \
+}
+
+#define RB_GENERATE_FIND(name, type, field, cmp, attr) \
+/* Finds the node with the same key as elm */ \
+attr struct type * \
+name##_RB_FIND(struct name *head, struct type *elm) \
+{ \
+ struct type *tmp = RB_ROOT(head); \
+ int comp; \
+ while (tmp) { \
+ comp = cmp(elm, tmp); \
+ if (comp < 0) \
+ tmp = RB_LEFT(tmp, field); \
+ else if (comp > 0) \
+ tmp = RB_RIGHT(tmp, field); \
+ else \
+ return (tmp); \
+ } \
+ return (nil); \
+}
+
+#define RB_GENERATE_NFIND(name, type, field, cmp, attr) \
+/* Finds the first node greater than or equal to the search key */ \
+attr struct type * \
+name##_RB_NFIND(struct name *head, struct type *elm) \
+{ \
+ struct type *tmp = RB_ROOT(head); \
+ struct type *res = nil; \
+ int comp; \
+ while (tmp) { \
+ comp = cmp(elm, tmp); \
+ if (comp < 0) { \
+ res = tmp; \
+ tmp = RB_LEFT(tmp, field); \
+ } \
+ else if (comp > 0) \
+ tmp = RB_RIGHT(tmp, field); \
+ else \
+ return (tmp); \
+ } \
+ return (res); \
+}
+
+#define RB_GENERATE_NEXT(name, type, field, attr) \
+/* ARGSUSED */ \
+attr struct type * \
+name##_RB_NEXT(struct type *elm) \
+{ \
+ if (RB_RIGHT(elm, field)) { \
+ elm = RB_RIGHT(elm, field); \
+ while (RB_LEFT(elm, field)) \
+ elm = RB_LEFT(elm, field); \
+ } else { \
+ if (RB_PARENT(elm, field) && \
+ (elm == RB_LEFT(RB_PARENT(elm, field), field))) \
+ elm = RB_PARENT(elm, field); \
+ else { \
+ while (RB_PARENT(elm, field) && \
+ (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\
+ elm = RB_PARENT(elm, field); \
+ elm = RB_PARENT(elm, field); \
+ } \
+ } \
+ return (elm); \
+}
+
+#define RB_GENERATE_PREV(name, type, field, attr) \
+/* ARGSUSED */ \
+attr struct type * \
+name##_RB_PREV(struct type *elm) \
+{ \
+ if (RB_LEFT(elm, field)) { \
+ elm = RB_LEFT(elm, field); \
+ while (RB_RIGHT(elm, field)) \
+ elm = RB_RIGHT(elm, field); \
+ } else { \
+ if (RB_PARENT(elm, field) && \
+ (elm == RB_RIGHT(RB_PARENT(elm, field), field))) \
+ elm = RB_PARENT(elm, field); \
+ else { \
+ while (RB_PARENT(elm, field) && \
+ (elm == RB_LEFT(RB_PARENT(elm, field), field)))\
+ elm = RB_PARENT(elm, field); \
+ elm = RB_PARENT(elm, field); \
+ } \
+ } \
+ return (elm); \
+}
+
+#define RB_GENERATE_MINMAX(name, type, field, attr) \
+attr struct type * \
+name##_RB_MINMAX(struct name *head, int val) \
+{ \
+ struct type *tmp = RB_ROOT(head); \
+ struct type *parent = nil; \
+ while (tmp) { \
+ parent = tmp; \
+ if (val < 0) \
+ tmp = RB_LEFT(tmp, field); \
+ else \
+ tmp = RB_RIGHT(tmp, field); \
+ } \
+ return (parent); \
+}
+
+#define RB_NEGINF -1
+#define RB_INF 1
+
+#define RB_INSERT(name, x, y) name##_RB_INSERT(x, y)
+#define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y)
+#define RB_FIND(name, x, y) name##_RB_FIND(x, y)
+#define RB_NFIND(name, x, y) name##_RB_NFIND(x, y)
+#define RB_NEXT(name, x, y) name##_RB_NEXT(y)
+#define RB_PREV(name, x, y) name##_RB_PREV(y)
+#define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF)
+#define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF)
+
+#define RB_FOREACH(x, name, head) \
+ for ((x) = RB_MIN(name, head); \
+ (x) != nil; \
+ (x) = name##_RB_NEXT(x))
+
+#define RB_FOREACH_FROM(x, name, y) \
+ for ((x) = (y); \
+ ((x) != nil) && ((y) = name##_RB_NEXT(x), (x) != nil); \
+ (x) = (y))
+
+#define RB_FOREACH_SAFE(x, name, head, y) \
+ for ((x) = RB_MIN(name, head); \
+ ((x) != nil) && ((y) = name##_RB_NEXT(x), (x) != nil); \
+ (x) = (y))
+
+#define RB_FOREACH_REVERSE(x, name, head) \
+ for ((x) = RB_MAX(name, head); \
+ (x) != nil; \
+ (x) = name##_RB_PREV(x))
+
+#define RB_FOREACH_REVERSE_FROM(x, name, y) \
+ for ((x) = (y); \
+ ((x) != nil) && ((y) = name##_RB_PREV(x), (x) != nil); \
+ (x) = (y))
+
+#define RB_FOREACH_REVERSE_SAFE(x, name, head, y) \
+ for ((x) = RB_MAX(name, head); \
+ ((x) != nil) && ((y) = name##_RB_PREV(x), (x) != nil); \
+ (x) = (y))
--- /dev/null
+++ b/sys/src/cmd/ext4srv/mkfile
@@ -1,0 +1,61 @@
+</$objtype/mkfile
+
+TARG=ext4srv
+CFLAGS=$CFLAGS -D__${objtype}__ -p -Iinclude
+
+OFILES=\
+ ext4.$O\
+ ext4_balloc.$O\
+ ext4_bcache.$O\
+ ext4_bitmap.$O\
+ ext4_block_group.$O\
+ ext4_blockdev.$O\
+ ext4_crc32.$O\
+ ext4_debug.$O\
+ ext4_dir.$O\
+ ext4_dir_idx.$O\
+ ext4_extent.$O\
+ ext4_fs.$O\
+ ext4_hash.$O\
+ ext4_ialloc.$O\
+ ext4_inode.$O\
+ ext4_journal.$O\
+ ext4_mbr.$O\
+ ext4_mkfs.$O\
+ ext4_super.$O\
+ ext4_trans.$O\
+ ext4srv.$O\
+ group.$O\
+ part.$O\
+
+HFILES=\
+ common.h\
+ group.h\
+ include/ext4.h\
+ include/ext4_balloc.h\
+ include/ext4_bcache.h\
+ include/ext4_bitmap.h\
+ include/ext4_block_group.h\
+ include/ext4_blockdev.h\
+ include/ext4_config.h\
+ include/ext4_crc32.h\
+ include/ext4_debug.h\
+ include/ext4_dir.h\
+ include/ext4_dir_idx.h\
+ include/ext4_extent.h\
+ include/ext4_fs.h\
+ include/ext4_hash.h\
+ include/ext4_ialloc.h\
+ include/ext4_inode.h\
+ include/ext4_journal.h\
+ include/ext4_mbr.h\
+ include/ext4_misc.h\
+ include/ext4_mkfs.h\
+ include/ext4_super.h\
+ include/ext4_trans.h\
+ include/ext4_types.h\
+ include/queue.h\
+ include/tree.h\
+
+BIN=/$objtype/bin
+</sys/src/cmd/mkone
--- /dev/null
+++ b/sys/src/cmd/ext4srv/part.c
@@ -1,0 +1,454 @@
+#include "ext4_config.h"
+#include "ext4.h"
+#include <thread.h>
+#include "ext4_mkfs.h"
+#include "group.h"
+#include "common.h"
+
+#define TRACE(fmt, ...) //fprint(2, fmt, __VA_ARGS__)
+
+#define BDEV2PART(bdev) ((bdev)->bdif->p_user)
+
+static struct {
+ QLock;
+ Part *ps;
+ u32int id;
+}sv;
+
+static long
+preadn(int f, void *av, long n, vlong offset)
+{
+ char *a;
+ long m, t;
+
+ assert(offset >= 0);
+
+ a = av;
+ t = 0;
+ while(t < n){
+ m = pread(f, a+t, n-t, offset);
+ if(m <= 0){
+ if(t == 0)
+ return m;
+ break;
+ }
+ t += m;
+ offset += m;
+ }
+ return t;
+}
+
+static int
+bdopen(struct ext4_blockdev *bdev)
+{
+ Part *p;
+
+ p = BDEV2PART(bdev);
+ TRACE("bdopen %p\n", p);
+ USED(p);
+
+ return 0;
+}
+
+static int
+bdread(struct ext4_blockdev *bdev, void *buf, u64int blkid, u32int blkcnt)
+{
+ Part *p;
+
+ p = BDEV2PART(bdev);
+ TRACE("bdread %p %p %llud %ud\n", p, buf, blkid, blkcnt);
+ if(preadn(p->f, buf, blkcnt*p->bdif.ph_bsize, blkid*p->bdif.ph_bsize) != blkcnt*p->bdif.ph_bsize)
+ return -1;
+
+ return 0;
+}
+
+static int
+bdwrite(struct ext4_blockdev *bdev, const void *buf, u64int blkid, u32int blkcnt)
+{
+ Part *p;
+
+ p = BDEV2PART(bdev);
+ TRACE("bdwrite %p %p %llud %ud\n", p, buf, blkid, blkcnt);
+ if(pwrite(p->f, buf, blkcnt*p->bdif.ph_bsize, blkid*p->bdif.ph_bsize) != blkcnt*p->bdif.ph_bsize)
+ return -1;
+
+ return 0;
+}
+
+static int
+bdclose(struct ext4_blockdev *bdev)
+{
+ Part *p;
+
+ p = BDEV2PART(bdev);
+ TRACE("bdclose %p\n", p);
+ USED(p);
+
+ return 0;
+}
+
+static int
+getblksz(char *dev, u32int *blksz)
+{
+ char *s, *e, *g, *a[5];
+ vlong x;
+ int f, n, r;
+
+ /* default blksz if couldn't find out the real one */
+ *blksz = 512;
+
+ f = -1;
+ g = nil;
+ if((s = smprint("%s_ctl", dev)) == nil)
+ goto error;
+ cleanname(s);
+ if((e = strrchr(s, '/')) == nil)
+ e = s;
+ strcpy(e, "/ctl");
+ f = open(s, OREAD);
+ free(s);
+ if(f >= 0){
+ if((g = malloc(4096)) == nil)
+ goto error;
+ for(n = 0; (r = read(f, g+n, 4096-n-1)) > 0; n += r);
+ g[n] = 0;
+ close(f);
+ f = -1;
+
+ for(s = g; (e = strchr(s, '\n')) != nil; s = e+1){
+ *e = 0;
+ if(tokenize(s, a, nelem(a)) >= 3 && strcmp(a[0], "geometry") == 0){
+ x = strtoll(a[2], &e, 0);
+ if(x > 0 && *e == 0)
+ *blksz = x;
+ if(*blksz != x){
+ werrstr("invalid block size: %s", a[2]);
+ goto error;
+ }
+ break;
+ }
+ }
+ }
+
+ close(f);
+ free(g);
+ return 0;
+error:
+ close(f);
+ free(g);
+ return -1;
+}
+
+static int
+fmtpart(Fmt *f)
+{
+ Part *p;
+
+ p = va_arg(f->args, Part*);
+
+ return fmtprint(f, f->r == 'M' ? "/%#llux" : "dev%#llux", p->qid.path);
+}
+
+static void *
+readfile(Part *p, char *path, usize *sz)
+{
+ usize n, got;
+ char *s, *d;
+ ext4_file f;
+ int r;
+
+ d = nil;
+ while(*path == '/')
+ path++;
+ s = smprint("%M/%s", p, path);
+ r = ext4_fopen2(&f, s, O_RDONLY);
+ free(s);
+
+ if(r == 0){
+ *sz = ext4_fsize(&f);
+ if((d = malloc(*sz+1)) == nil){
+ ext4_fclose(&f);
+ goto error;
+ }
+
+ for(n = 0; n < *sz; n += got){
+ if(ext4_fread(&f, d+n, *sz-n, &got) < 0){
+ werrstr("readfile: %r");
+ ext4_fclose(&f);
+ goto error;
+ }
+ if(got == 0)
+ break;
+ }
+
+ *sz = n;
+ ext4_fclose(&f);
+ }else{
+error:
+ free(d);
+ d = nil;
+ *sz = 0;
+ }
+
+ return d;
+}
+
+static int
+mountpart(Part *p, Opts *opts)
+{
+ usize sz;
+ char *gr;
+ int r;
+
+ r = 0;
+ if(snprint(p->dev, sizeof(p->dev), "%Ð", p) >= sizeof(p->dev)){
+ werrstr("part path too long");
+ goto error;
+ }
+ if(snprint(p->mnt, sizeof(p->mnt), "%M/", p) >= sizeof(p->mnt)){
+ werrstr("part path too long");
+ goto error;
+ }
+ if(ext4_device_register(&p->bdev, p->dev) < 0){
+ werrstr("register: %r");
+ goto error;
+ }
+ if(ext4_mount(p->dev, p->mnt, opts->rdonly) < 0){
+ werrstr("mount: %r");
+ goto error;
+ }
+ if(ext4_mount_setup_locks(p->mnt, &p->oslocks) < 0){
+ werrstr("locks: %r");
+ goto error;
+ }
+ if(ext4_recover(p->mnt) < 0){
+ werrstr("recover: %r");
+ goto error;
+ }
+ if(ext4_journal_start(p->mnt) < 0){
+ werrstr("journal: %r");
+ goto error;
+ }
+ if(opts->cachewb)
+ ext4_cache_write_back(p->mnt, 1);
+
+ if(ext4_get_sblock(p->mnt, &p->sb) < 0){
+ werrstr("sblock: %r");
+ goto error;
+ }
+
+ if(opts->group != nil){
+ r = loadgroups(&p->groups, opts->group);
+ }else if((gr = readfile(p, "/etc/group", &sz)) != nil){
+ gr[sz] = 0;
+ r = loadgroups(&p->groups, gr);
+ free(gr);
+ }
+ if(r != 0)
+ goto error;
+
+ return 0;
+error:
+ werrstr("mountpart: %r");
+ return -1;
+}
+
+static void
+plock(void *aux)
+{
+ Part *p;
+
+ p = aux;
+ qlock(p);
+}
+
+static void
+punlock(void *aux)
+{
+ Part *p;
+
+ p = aux;
+ qunlock(p);
+}
+
+Part *
+openpart(char *dev, Opts *opts)
+{
+ struct ext4_mkfs_info info;
+ struct ext4_fs fs;
+ u32int blksz;
+ Part *p;
+ char *s;
+ Dir *d;
+ int f;
+
+ d = nil;
+ p = nil;
+ s = nil;
+ qlock(&sv);
+
+ fmtinstall(L'Ð', fmtpart);
+ fmtinstall('M', fmtpart);
+
+ f = open(dev, ORDWR);
+ if(f < 0 || (d = dirfstat(f)) == nil)
+ goto error;
+ /* see if it's already opened */
+ for(p = sv.ps; p != nil && p->qid.path != d->qid.path; p = p->next);
+ if(p == nil){ /* no? then make one */
+ if(getblksz(dev, &blksz) != 0 || (p = calloc(1, sizeof(*p)+blksz+strlen(dev)+1)) == nil)
+ goto error;
+
+ p->f = f;
+ p->qid = d->qid;
+ p->bdev.bdif = &p->bdif;
+ p->bdev.part_size = d->length;
+ p->bdif.open = bdopen;
+ p->bdif.bread = bdread;
+ p->bdif.bwrite = bdwrite;
+ p->bdif.close = bdclose;
+ p->bdif.ph_bsize = blksz;
+ p->bdif.ph_bcnt = d->length/blksz;
+ p->bdif.ph_bbuf = p->blkbuf;
+ p->oslocks.lock = plock;
+ p->oslocks.unlock = punlock;
+ p->oslocks.p_user = p;
+ p->bdif.p_user = p;
+
+ p->partdev = (char*)(p+1) + blksz;
+ strcpy(p->partdev, dev);
+
+ if(opts->fstype > 1){
+ memset(&fs, 0, sizeof(fs));
+ memset(&info, 0, sizeof(info));
+ info.block_size = opts->blksz;
+ snprint(info.label, sizeof(info.label), opts->label);
+ info.inode_size = opts->inodesz;
+ info.inodes = opts->ninode;
+ info.journal = true;
+ if(ext4_mkfs(&fs, &p->bdev, &info, opts->fstype) < 0){
+ werrstr("mkfs: %r");
+ goto error;
+ }
+ }
+
+ if(mountpart(p, opts) != 0)
+ goto error;
+
+ p->next = sv.ps;
+ if(sv.ps != nil)
+ sv.ps->prev = p;
+ sv.ps = p;
+ p->qidmask.path = ((uvlong)sv.id++) << 32;
+ p->qidmask.type = QTDIR;
+ }else{
+ close(f);
+ }
+
+ free(d);
+ free(s);
+ qunlock(&sv);
+
+ return p;
+
+error:
+ werrstr("openpart: %r");
+ if(f >= 0)
+ close(f);
+ free(d);
+ free(p);
+ free(s);
+ qunlock(&sv);
+
+ return nil;
+}
+
+static void
+_closepart(Part *p)
+{
+ ext4_cache_write_back(p->mnt, 0);
+ if(ext4_journal_stop(p->mnt) < 0)
+ fprint(2, "closepart: journal %s: %r\n", p->mnt);
+ if(ext4_umount(p->mnt) < 0)
+ fprint(2, "closepart: umount %s: %r\n", p->mnt);
+ if(ext4_device_unregister(p->dev) < 0)
+ fprint(2, "closepart: unregister %s: %r\n", p->dev);
+ close(p->f);
+ if(p->prev != nil)
+ p->prev = p->next;
+ if(p->next != nil)
+ p->next->prev = p->prev;
+ if(p == sv.ps)
+ sv.ps = p->next;
+ freegroups(&p->groups);
+ free(p);
+}
+
+void
+closepart(Part *p)
+{
+ qlock(&sv);
+ _closepart(p);
+ qunlock(&sv);
+}
+
+void
+closeallparts(void)
+{
+ qlock(&sv);
+ while(sv.ps != nil)
+ _closepart(sv.ps);
+ qunlock(&sv);
+}
+
+void
+statallparts(void)
+{
+ struct ext4_mount_stats s;
+ uvlong div;
+ Part *p;
+
+ qlock(&sv);
+ for(p = sv.ps; p != nil; p = p->next){
+ if(ext4_mount_point_stats(p->mnt, &s) < 0){
+ fprint(2, "%s: %r\n", p->partdev);
+ }else{
+ print(
+ "%s (inodes) free %ud, used %ud, total %ud\n",
+ p->partdev,
+ s.free_inodes_count,
+ s.inodes_count-s.free_inodes_count,
+ s.inodes_count
+ );
+ print(
+ "%s (blocks) free %llud, used %llud, total %llud, each %ud\n",
+ p->partdev,
+ s.free_blocks_count,
+ s.blocks_count-s.free_blocks_count,
+ s.blocks_count, s.block_size
+ );
+ div = 1024/(s.block_size/1024);
+ print(
+ "%s (MB) free %llud, used %llud, total %llud\n",
+ p->partdev,
+ s.free_blocks_count/div,
+ (s.blocks_count-s.free_blocks_count)/div,
+ s.blocks_count/div
+ );
+ }
+ }
+ qunlock(&sv);
+}
+
+void
+syncallparts(void)
+{
+ Part *p;
+ qlock(&sv);
+ for(p = sv.ps; p != nil; p = p->next){
+ if(ext4_cache_flush(p->mnt) < 0)
+ fprint(2, "%s: %r\n", p->partdev);
+ }
+ qunlock(&sv);
+}