This document keep trace of the tests performed that lead to the summarized results presented in the benchmark document. Several helper scripts detailed at the end of this document have been necessary to obtain these results.
Performance tests has been performed on an HPE Proliant server (a ProLiant XL230a Gen9 running two Intel(R) Xeon(R) CPU E5-2690 v3 @ 2.60GHz processors) running a Devuan beowulf Linux system. Other tests have been run from Virtual Machines (FreeBSD 12.1, Devuan 3.0.0) of a Proxmox hypervisor running one on an Intel Core i5-7400 (3 GHz) based computer.
root@terre:/mnt/localdisk/Benchmark_tools# ./build_test_tree.bash SRC
1024+0 records in
1024+0 records out
1048576 bytes (1.0 MB, 1.0 MiB) copied, 0.00395381 s, 265 MB/s
1024+0 records in
1024+0 records out
1048576 bytes (1.0 MB, 1.0 MiB) copied, 0.00621889 s, 169 MB/s
1+0 records in
1+0 records out
1 byte copied, 0.000386102 s, 2.6 kB/s
root@terre:/mnt/localdisk/Benchmark_tools# dar -c backup -R SRC
--------------------------------------------
14 inode(s) saved
including 3 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
0 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
0 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 14
--------------------------------------------
EA saved for 1 inode(s)
FSA saved for 5 inode(s)
--------------------------------------------
root@terre:/mnt/localdisk/Benchmark_tools# mkdir DST
root@terre:/mnt/localdisk/Benchmark_tools# dar -x backup -R DST
--------------------------------------------
14 inode(s) restored
including 3 hard link(s)
0 inode(s) not restored (not saved in archive)
0 inode(s) not restored (overwriting policy decision)
0 inode(s) ignored (excluded by filters)
0 inode(s) failed to restore (filesystem error)
0 inode(s) deleted
--------------------------------------------
Total number of inode(s) considered: 14
--------------------------------------------
EA restored for 1 inode(s)
FSA restored for 1 inode(s)
--------------------------------------------
root@terre:/mnt/localdisk/Benchmark_tools#
We simply performed backup of SRC
directory with dar's default options, then
restore this backup into the DST
directory, let's now compare SRC
and DST
contents:
root@terre:/mnt/localdisk# du -s SRC DST
2068 SRC
1048 DST
root@terre:/mnt/localdisk#
The space used by DST
is less than the space used by SRC
! At first
we could beleive that not all data could be restored, let's looking for the explanation:
root@terre:/mnt/localdisk#ls -iRl SRC DST
DST:
total 1044
414844 drwxr-xr-x 2 root root 4096 Oct 28 11:09 SUB
414850 drwxr-xr-x 2 root root 4096 Oct 22 11:09 dev
414848 brw-r--r-- 1 root root 2, 1 Oct 28 11:09 fd1
414842 crw-r--r-- 1 root root 3, 1 Oct 28 11:09 null
414841 prw-r--r-- 2 root root 0 Oct 28 11:09 pipe
414840 -rw-rwxr--+ 1 root root 1048576 Oct 28 11:09 plain_zeroed
414849 -rw-r--r-- 1 nobody root 1048582 Oct 28 11:09 random
414843 -rw-r--r-- 2 root root 10240000 Oct 28 11:09 sparse_file
DST/SUB:
total 4
414841 prw-r--r-- 2 root root 0 Oct 28 11:09 hard_linked_pipe
414846 srw-rw-rw- 2 root root 0 Oct 12 23:00 hard_linked_socket
414843 -rw-r--r-- 2 root root 10240000 Oct 28 11:09 hard_linked_sparse_file
414845 lrwxrwxrwx 1 root root 6 Oct 28 11:09 symlink-broken -> random
414847 lrwxrwxrwx 1 bin daemon 9 Oct 28 11:09 symlink-valid -> ../random
DST/dev:
total 0
414846 srw-rw-rw- 2 root root 0 Oct 12 23:00 log
SRC:
total 2064
411386 drwxr-xr-x 2 root root 4096 Oct 28 11:09 SUB
414836 drwxr-xr-x 2 root root 4096 Oct 22 11:09 dev
414835 brw-r--r-- 1 root root 2, 1 Oct 28 11:09 fd1
414834 crw-r--r-- 1 root root 3, 1 Oct 28 11:09 null
414832 prw-r--r-- 2 root root 0 Oct 28 11:09 pipe
414826 -rw-rwxr--+ 1 root root 1048576 Oct 28 11:09 plain_zeroed
414827 -rw-r--r-- 1 nobody root 1048582 Oct 28 11:09 random
414828 -rw-r--r-- 2 root root 10240000 Oct 28 11:09 sparse_file
SRC/SUB:
total 4
414832 prw-r--r-- 2 root root 0 Oct 28 11:09 hard_linked_pipe
414837 srw-rw-rw- 2 root root 0 Oct 12 23:00 hard_linked_socket
414828 -rw-r--r-- 2 root root 10240000 Oct 28 11:09 hard_linked_sparse_file
414830 lrwxrwxrwx 1 root root 6 Oct 28 11:09 symlink-broken -> random
414831 lrwxrwxrwx 1 bin daemon 9 Oct 28 11:09 symlink-valid -> ../random
SRC/dev:
total 0
414837 srw-rw-rw- 2 root root 0 Oct 12 23:00 log
root@terre:/mnt/localdisk#
All files are present in DST
and use the expected space usage, as reported by the ls
command.
We can also see that the hard linked inode were properly restored for plain file, named pipe and unix socket: the
inode number in first column is the same (see colorized output above).
Maybe something is missing elsewhere?
root@terre:/mnt/localdisk# getfacl SRC/plain_zeroed DST/plain_zeroed
# file: SRC/plain_zeroed
# owner: root
# group: root
user::rw-
user:nobody:rwx
group::r--
mask::rwx
other::r--
# file: DST/plain_zeroed
# owner: root
# group: root
user::rw-
user:nobody:rwx
group::r--
mask::rwx
other::r--
root@terre:/mnt/localdisk# getfattr -d SRC/plain_zeroed DST/plain_zeroed
# file: SRC/plain_zeroed
user.hello="hello world!!!"
# file: DST/plain_zeroed
user.hello="hello world!!!"
root@terre:/mnt/localdisk/Benchmark_tools# lsattr SRC/plain_zeroed DST/plain_zeroed
s---i-d-------e---- SRC/plain_zeroed
s---i-d-------e---- DST/plain_zeroed
root@terre:/mnt/localdisk/Benchmark_tools#
To summarize:
So what? Let's rerun du
file by file:
root@terre:/mnt/localdisk/Benchmark_tools# du -B1 SRC/* DST/*
8192 SRC/SUB
4096 SRC/dev
0 SRC/fd1
0 SRC/null
1048576 SRC/plain_zeroed
1052672 SRC/random
8192 DST/SUB
4096 DST/dev
0 DST/fd1
0 DST/null
4096 DST/plain_zeroed
1052672 DST/random
root@terre:/mnt/localdisk/Benchmark_tools# ls -l SRC/plain_zeroed DST/plain_zeroed
-rw-rwxr--+ 1 root root 1048576 Oct 21 18:40 DST/plain_zeroed
-rw-rwxr--+ 1 root root 1048576 Oct 21 18:40 SRC/plain_zeroed
root@terre:/mnt/localdisk/Benchmark_tools#
OK here is the explanation: plain_zeroed
file was using 1048576 bytes of disk space in SRC
and consumes only 4096 bytes in DST, but it has its file size is still officially 1048576, it has thus become
now a sparse file (not all zeroed bytes are stored).
root@terre:/mnt/localdisk/Benchmark_tools# diff -s SRC/plain_zeroed DST/plain_zeroed
Files SRC/plain_zeroed and DST/plain_zeroed are identical
root@terre:/mnt/localdisk/Benchmark_tools#
But nothing changes from the user point of view, the restoration process with dar just optimized the space usage.
Let's continue checking the inode dates. As you know, Unix inode have several dates:
The ls -iRl
command we used so far does only show the mtime date moreover with
a time accuracy of only one minute, while modern systems provide nanosecond precision. For that
reason we will use the stat
command instead to have all dates at the system time accuracy:
root@terre:/mnt/localdisk/Benchmark_tools# stat SRC/random DST/random
File: SRC/random
Size: 1048576 Blocks: 2048 IO Block: 4096 regular file
Device: 802h/2050d Inode: 414840 Links: 1
Access: (0644/-rw-r--r--) Uid: (65534/ nobody) Gid: ( 0/ root)
Access: 2020-10-22 12:13:01.813319506 +0200
Modify: 2020-10-22 12:12:57.765328555 +0200
Change: 2020-10-22 12:12:59.805323991 +0200
Birth: -
File: DST/random
Size: 1048576 Blocks: 2048 IO Block: 4096 regular file
Device: 802h/2050d Inode: 414889 Links: 1
Access: (0644/-rw-r--r--) Uid: (65534/ nobody) Gid: ( 0/ root)
Access: 2020-10-22 12:13:01.813319506 +0200
Modify: 2020-10-22 12:12:57.765328555 +0200
Change: 2020-10-22 12:14:34.877131738 +0200
Birth: -
root@terre:/mnt/localdisk/Benchmark_tools#
From the above output we see that:
As we targeted this benchmark mainly for Linux which has not yet the btime
available
(Well some Linux file systems support
btime but its access is not yet fully available to applications), we will thus momentarily change to a BSD system
to play with btime
. BSD systems include MACOS X, FreeBSD, NetBSD, butterflyBSD,... we will use FreeBSD here.
Under FreeBSD, the stat
command is not as easy to read as under Linux, however it is
very flexible which we will leverage to mimic the Linux output:
root@FreeBSD:~denis # which mystat
mystat: aliased to stat -f "%N%nAccess: %Sa%nModify: %Sm%nChange: %Sc%nBirth: %SB%n" !*
root@FreeBSD:~denis # mystat SRC/random
SRC/random
Access: Oct 27 13:28:41 2020
Modify: Oct 22 15:34:07 2020
Change: Oct 22 15:34:09 2020
Birth: Oct 22 15:34:07 2020
root@FreeBSD:~denis # dar -c backup -R SRC -q
root@FreeBSD:~denis # mkdir DST
root@FreeBSD:~denis # dar -x backup -R DST -q
root@FreeBSD:~denis # mystat DST/random
DST/random
Access: Oct 27 13:28:41 2020
Modify: Oct 22 15:34:07 2020
Change: Oct 27 13:31:50 2020
Birth: Oct 22 15:34:07 2020
root@FreeBSD:~denis #
In conclusion dar also saves and restores btime
properly.
Let's do the same we did previously using rsync. We start by copying SRC
directory to DST
:
root@terre:/mnt/localdisk/Benchmark_tools# chattr -i DST/plain_zeroed
root@terre:/mnt/localdisk/Benchmark_tools# rm -rf DST
root@terre:/mnt/localdisk/Benchmark_tools# mkdir DST
root@terre:/mnt/localdisk/Benchmark_tools# rsync -arvHAXS SRC/* DST
sending incremental file list
created directory DST
fd1
null
pipe
plain_zeroed
random
SUB/
SUB/hard_linked_pipe => pipe
SUB/hard_linked_socket
SUB/hard_linked_sparse_file
SUB/symlink-broken -> random
SUB/symlink-valid -> ../random
dev/
dev/log => SUB/hard_linked_socket
sparse_file => SUB/hard_linked_sparse_file
sent 12,340,852 bytes received 198 bytes 24,682,100.00 bytes/sec
total size is 22,577,173 speedup is 1.83
root@terre:/mnt/localdisk/Benchmark_tools#
First note, the backup and restoration is done in one step, where dar was decorelating the backup operation
from the restoration operation. The resulting backup needs not software to be restored (DST
is a copy of SRC
). For dar to reach the same result (without using storage for the backup) this
implies two dar commands: dar -c - -R SRC | dar -x - --sequential-read -R DST
. The situation is
similar with tar
, you need two commands to perform the same task: tar -cf - | tar -xf -
root@terre:/mnt/localdisk/Benchmark_tools# du -s SRC DST
2056 SRC
1028 DST
root@terre:/mnt/localdisk/Benchmark_tools#
Here too, the restored data uses less space than the original data, sparse file have been taken into account (need specifying -S option) and space optimization of non sparse file is performed.
root@terre:/mnt/localdisk/Benchmark_tools# ls -iRl SRC DST
DST:
total 12060
414843 drwxr-xr-x 2 root root 4096 Oct 28 11:09 SUB
414844 drwxr-xr-x 2 root root 4096 Oct 22 11:09 dev
414840 brw-r--r-- 1 root root 2, 1 Oct 28 11:09 fd1
414841 crw-r--r-- 1 root root 3, 1 Oct 28 11:09 null
414842 prw-r--r-- 2 root root 0 Oct 28 11:09 pipe
414848 -rw-rwxr--+ 1 root root 1048576 Oct 28 11:09 plain_zeroed
414849 -rw-r--r-- 1 nobody root 1048582 Oct 28 11:09 random
414850 -rw-r--r-- 2 root root 10240000 Oct 28 11:09 sparse_file
DST/SUB:
total 10000
414842 prw-r--r-- 2 root root 0 Oct 28 11:09 hard_linked_pipe
414845 srw-rw-rw- 2 root root 0 Oct 12 23:00 hard_linked_socket
414850 -rw-r--r-- 2 root root 10240000 Oct 28 11:09 hard_linked_sparse_file
414846 lrwxrwxrwx 1 root root 6 Oct 28 11:09 symlink-broken -> random
414847 lrwxrwxrwx 1 bin daemon 9 Oct 28 11:09 symlink-valid -> ../random
DST/dev:
total 0
414845 srw-rw-rw- 2 root root 0 Oct 12 23:00 log
SRC:
total 2064
411386 drwxr-xr-x 2 root root 4096 Oct 28 11:09 SUB
414836 drwxr-xr-x 2 root root 4096 Oct 22 11:09 dev
414835 brw-r--r-- 1 root root 2, 1 Oct 28 11:09 fd1
414834 crw-r--r-- 1 root root 3, 1 Oct 28 11:09 null
414832 prw-r--r-- 2 root root 0 Oct 28 11:09 pipe
414826 -rw-rwxr--+ 1 root root 1048576 Oct 28 11:09 plain_zeroed
414827 -rw-r--r-- 1 nobody root 1048582 Oct 28 11:09 random
414828 -rw-r--r-- 2 root root 10240000 Oct 28 11:09 sparse_file
SRC/SUB:
total 4
414832 prw-r--r-- 2 root root 0 Oct 28 11:09 hard_linked_pipe
414837 srw-rw-rw- 2 root root 0 Oct 12 23:00 hard_linked_socket
414828 -rw-r--r-- 2 root root 10240000 Oct 28 11:09 hard_linked_sparse_file
414830 lrwxrwxrwx 1 root root 6 Oct 28 11:09 symlink-broken -> random
414831 lrwxrwxrwx 1 bin daemon 9 Oct 28 11:09 symlink-valid -> ../random
SRC/dev:
total 0
414837 srw-rw-rw- 2 root root 0 Oct 12 23:00 log
root@terre:/mnt/localdisk/Benchmark_tools#
All files are present in DST
and use the expected space usage, as reported by the ls
.
We can also see that all three hard linked inode (plain file, socket and named pipe) are restored properly.
So we can suspect the cause of the size difference to be linked with sparse files:
Let's now check file's metadata:
root@terre:/mnt/localdisk/Benchmark_tools# getfacl SRC/plain_zeroed DST/plain_zeroed
# file: SRC/plain_zeroed
# owner: root
# group: root
user::rw-
user:nobody:rwx
group::r--
mask::rwx
other::r--
# file: DST/plain_zeroed
# owner: root
# group: root
user::rw-
user:nobody:rwx
group::r--
group::rwx
other::r--
root@terre:/mnt/localdisk/Benchmark_tools# getfattr -d SRC/plain_zeroed DST/plain_zeroed
# file: SRC/plain_zeroed
user.hello="hello world!!!"
# file: DST/plain_zeroed
user.hello="hello world!!!"
root@terre:/mnt/localdisk/Benchmark_tools# lsattr SRC/plain_zeroed DST/plain_zeroed
s---i-d-------e---- SRC/plain_zeroed
--------------e---- DST/plain_zeroed
root@terre:/mnt/localdisk/Benchmark_tools#stat SRC/random DST/random
File: SRC/random
Size: 1048582 Blocks: 2056 IO Block: 4096 regular file
Device: 802h/2050d Inode: 414827 Links: 1
Access: (0644/-rw-r--r--) Uid: (65534/ nobody) Gid: ( 0/ root)
Access: 2020-10-28 11:09:59.977926733 +0100
Modify: 2020-10-28 11:09:57.973931318 +0100
Change: 2020-10-28 11:09:57.973931318 +0100
Birth: -
File: DST/random
Size: 1048582 Blocks: 2056 IO Block: 4096 regular file
Device: 802h/2050d Inode: 414849 Links: 1
Access: (0644/-rw-r--r--) Uid: (65534/ nobody) Gid: ( 0/ root)
Access: 2020-10-28 12:07:53.622841733 +0100
Modify: 2020-10-28 11:09:57.973931318 +0100
Change: 2020-10-28 12:07:53.622841733 +0100
Birth: -
root@terre:/mnt/localdisk/Benchmark_tools#
So in summary:
But
For btime as we did before, let's test under a FreeBSD system:
root@FreeBSD:~denis # rm -rf DST
root@FreeBSD:/home/denis # which mystat
mystat: aliased to stat -f "%N%nAccess: %Sa%nModify: %Sm%nChange: %Sc%nBirth: %SB%n" !*
root@FreeBSD:/home/denis # mystat SRC/random
SRC/random
Access: Oct 27 14:27:59 2020
Modify: Oct 22 15:34:07 2020
Change: Oct 22 15:34:09 2020
Birth: Oct 22 15:34:07 2020
root@FreeBSD:/home/denis # mkdir DST
root@FreeBSD:/home/denis # rsync -arv SRC/* DST
sending incremental file list
fd1
null
pipe
plain_zeroed
random
sparse_file
SUB/
SUB/hard_linked_socket
SUB/hard_linked_sparse_file
SUB/symlink-broken -> random
SUB/symlink-valid -> ../random
dev/
dev/log -> /var/run/log
sent 22,583,283 bytes received 129 bytes 45,166,824.00 bytes/sec
total size is 22,577,179 speedup is 1.00
root@FreeBSD:/home/denis # mystat DST/random
DST/random
Access: Oct 27 14:28:53 2020
Modify: Oct 22 15:34:07 2020
Change: Oct 27 14:28:53 2020
Birth: Oct 22 15:34:07 2020
root@FreeBSD:/home/denis #
So, birthtime is properly restored.
As done with previously, let's save and restore the SRC
directory to DST
... Note that by default
no sparse file is taken into account (this is why we added the -S option
), same with acl (so we added
the --acl
option) and Extended Attributes (unless --xattrs
is added). The
tar command-line becomes thus a bit longer:
root@terre:/mnt/localdisk/Benchmark_tools# rm -rf DST
root@terre:/mnt/localdisk/Benchmark_tools# cd SRC
root@terre:/mnt/localdisk/Benchmark_tools/SRC# tar --acl --xattrs -cSf ../backup.tar *
tar: SUB/hard_linked_socket: socket ignored
tar: dev/log: socket ignored
root@terre:/mnt/localdisk/Benchmark_tools/SRC# cd ../
root@terre:/mnt/localdisk/Benchmark_tools# mkdir DST
root@terre:/mnt/localdisk/Benchmark_tools# cd DST
root@terre:/mnt/localdisk/Benchmark_tools/DST# tar --acl --xattrs -xSf ../backup.tar
root@terre:/mnt/localdisk/Benchmark_tools/DST# cd ..
root@terre:/mnt/localdisk/Benchmark_tools#
Now let's compare the restored data with the original:
root@terre:/mnt/localdisk/Benchmark_tools# du -s SRC DST
2068 SRC
2068 DST
root@terre:/mnt/localdisk/Benchmark_tools#
The sparse file has been properly restored (thanks to the -S option
for that) but not space optimization
has been performed.
root@terre:/mnt/localdisk/Benchmark_tools# ls -iRl SRC DST
DST:
total 12060
414841 drwxr-xr-x 2 root root 4096 Oct 28 11:09 SUB
414846 drwxr-xr-x 2 root root 4096 Oct 22 11:09 dev
414847 brw-r--r-- 1 root root 2, 1 Oct 28 11:09 fd1
414848 crw-r--r-- 1 root root 3, 1 Oct 28 11:09 null
414849 prw-r--r-- 1 root root 0 Oct 28 11:09 pipe
414850 -rw-rwxr-- 1 root root 1048576 Oct 28 11:09 plain_zeroed
414852 -rw-r--r-- 1 nobody root 1048582 Oct 28 11:09 random
414843 -rw-r--r-- 2 root root 10240000 Oct 28 11:09 sparse_file
DST/SUB:
total 10000
414845 prw-r--r-- 1 root root 0 Oct 28 11:09 hard_linked_pipe
414843 -rw-r--r-- 2 root root 10240000 Oct 28 11:09 hard_linked_sparse_file
414842 lrwxrwxrwx 1 root root 6 Oct 28 11:09 symlink-broken -> random
414844 lrwxrwxrwx 1 bin daemon 9 Oct 28 11:09 symlink-valid -> ../random
DST/dev:
total 0
SRC:
total 2064
411386 drwxr-xr-x 2 root root 4096 Oct 28 11:09 SUB
414836 drwxr-xr-x 2 root root 4096 Oct 22 11:09 dev
414835 brw-r--r-- 1 root root 2, 1 Oct 28 11:09 fd1
414834 crw-r--r-- 1 root root 3, 1 Oct 28 11:09 null
414832 prw-r--r-- 2 root root 0 Oct 28 11:09 pipe
414826 -rw-rwxr--+ 1 root root 1048576 Oct 28 11:09 plain_zeroed
414827 -rw-r--r-- 1 nobody root 1048582 Oct 28 11:09 random
414828 -rw-r--r-- 2 root root 10240000 Oct 28 11:09 sparse_file
SRC/SUB:
total 4
414832 prw-r--r-- 2 root root 0 Oct 28 11:09 hard_linked_pipe
414837 srw-rw-rw- 2 root root 0 Oct 12 23:00 hard_linked_socket
414828 -rw-r--r-- 2 root root 10240000 Oct 28 11:09 hard_linked_sparse_file
414830 lrwxrwxrwx 1 root root 6 Oct 28 11:09 symlink-broken -> random
414831 lrwxrwxrwx 1 bin daemon 9 Oct 28 11:09 symlink-valid -> ../random
SRC/dev:
total 0
414837 srw-rw-rw- 2 root root 0 Oct 12 23:00 log
root@terre:/mnt/localdisk/Benchmark_tools#
The warning was not vain, SUB/hard_linked_socket
and log
are missing in DST
.
This is however a minor problem as usually unix sockets get recreated by the process using them. However
we might have some permission and ownership to set back, by hand. A possible use case is syslog
daemon,
when let available for a chrooted process or container (MTA, or other network service).
The second problem is a bit more annoying: the hard linked fifo (aka named pipe)
is silently restored as two independent named pipes (the inode number are different in the first column
for pipe
and SUB/hard_linked_pipe
and their respective link count was 2
in SRC
but is now 1
in DST
. If two processes in different namespaces or
chrooted environment, exchange data by mean of such hardlinked pipe, after restoration, if you are not
aware of this failure, it will thus be difficult to identify why the two processes are just locked out, one
waiting for data that will never come from the pipe, the other stuck for the pipe to be read.
Let's continue by checking the file's metadata:
root@terre:/mnt/localdisk/Benchmark_tools# getfacl SRC/plain_zeroed DST/plain_zeroed
# file: SRC/plain_zeroed
# owner: root
# group: root
user::rw-
user:nobody:rwx
group::r--
mask::rwx
other::r--
# file: DST/plain_zeroed
# owner: root
# group: root
user::rw-
user:nobody:rwx
group::r--
mask::rwx
other::r--
root@terre:/mnt/localdisk/Benchmark_tools# getfattr -d SRC/plain_zeroed DST/plain_zeroed
# file: SRC/plain_zeroed
user.hello="hello world!!!"
# file: DST/plain_zeroed
user.hello="hello world!!!"
root@terre:/mnt/localdisk/Benchmark_tools# lsattr SRC/plain_zeroed DST/plain_zeroed
s---i-d-------e---- SRC/plain_zeroed
--------------e---- DST/plain_zeroed
root@terre:/mnt/localdisk/Benchmark_tools#
Note that without --xattrs
at creation time the timestamp accuracy of tar
is 1 second:
root@terre:/mnt/localdisk/Benchmark_tools# stat SRC/random DST/random
File: SRC/random
Size: 1048576 Blocks: 2048 IO Block: 4096 regular file
Device: 802h/2050d Inode: 414841 Links: 1
Access: (0644/-rw-r--r--) Uid: (65534/ nobody) Gid: ( 0/ root)
From the above output we see that:
--xattrs
to take into account today's system common time accuracy of one nanosecondBut
For the last date, birthtime again we will perform the test under FreeBSD:
root@FreeBSD:~denis # which mystat
mystat: aliased to stat -f "%N%nAccess: %Sa%nModify: %Sm%nChange: %Sc%nBirth: %SB%n" !*
root@FreeBSD:~denis # mystat SRC/random
SRC/random
Access: Oct 27 19:40:13 2020
Modify: Oct 22 15:34:07 2020
Change: Oct 22 15:34:09 2020
Birth: Oct 22 15:34:07 2020
root@FreeBSD:~denis # cd SRC
root@FreeBSD:~denis/SRC # gtar -cf ../backup.tar random
root@FreeBSD:~denis/SRC # cd ..
root@FreeBSD:~denis # mkdir DST
root@FreeBSD:~denis # cd DST
root@FreeBSD:~denis/DST # tar -xf ../backup.tar
root@FreeBSD:~denis/DST # cd ..
root@FreeBSD:~denis # mystat DST/random
DST/random
Access: Oct 28 15:43:30 2020
Modify: Oct 22 15:34:07 2020
Change: Oct 28 15:43:30 2020
Birth: Oct 22 15:34:07 2020
root@FreeBSD:~denis #
gtar saved and restored the birthtime
To evaluate this feature, in a first time we will create two files A.txt and B.txt and make a first backup. Then we remove A.txt and add C.txt then make a second backup. We should be able to restore the data in both states (A+B and B+C). To simplify the operation we use the historization_feature script described at the end of this document.
root@terre:/mnt/memdisk# rm -rf SRC
root@terre:/mnt/memdisk# ./historization_feature SRC phase1
root@terre:/mnt/memdisk# dar -c full -g SRC -q
root@terre:/mnt/memdisk# ./historization_feature SRC phase2
root@terre:/mnt/memdisk# dar -c diff -A full -g SRC -q
root@terre:/mnt/memdisk# mkdir DST
root@terre:/mnt/memdisk# dar -x full -R DST -q
root@terre:/mnt/memdisk# ls -lR DST
DST:
total 0
drwxr-xr-x 2 root root 80 Nov 6 18:37 SRC
DST/SRC:
total 8
-rw-r--r-- 1 root root 13 Nov 6 18:37 A.txt
-rw-r--r-- 1 root root 24 Nov 6 18:37 B.txt
root@terre:/mnt/memdisk# dar -x diff -R DST -w -q
root@terre:/mnt/memdisk# ls -lR DST
DST:
total 0
drwxr-xr-x 2 root root 80 Nov 6 18:38 SRC
DST/SRC:
total 8
-rw-r--r-- 1 root root 24 Nov 6 18:37 B.txt
-rw-r--r-- 1 root root 21 Nov 6 18:38 C.txt
root@terre:/mnt/memdisk#
Historization is present, we can get back from backup both saved states
In complement dar proposes a manager dar_manager to easily locate file's status between the archives the database has been feeded with, as well as the file's data present in each archive:
root@terre:/mnt/memdisk# dar_manager -C base.dmd
root@terre:/mnt/memdisk# dar_manager -B base.dmd -A full
root@terre:/mnt/memdisk# dar_manager -B base.dmd -A diff
root@terre:/mnt/memdisk# dar_manager -B base.dmd -f SRC/A.txt
1 Fri Nov 6 18:37:51 2020 saved absent
2 Fri Nov 6 18:38:04 2020 removed absent
root@terre:/mnt/memdisk# dar_manager -B base.dmd -f SRC/B.txt
1 Fri Nov 6 18:37:51 2020 saved absent
2 Fri Nov 6 18:37:51 2020 present absent
root@terre:/mnt/memdisk# dar_manager -B base.dmd -f SRC/C.txt
2 Fri Nov 6 18:38:04 2020 saved absent
root@terre:/mnt/memdisk# dar_manager -B base.dmd -l
dar path :
dar options :
database version: 5
compression used: gzip
archive # | path | basename
------------+--------------+---------------
1 . full
2 . diff
root@terre:/mnt/memdisk# dar_manager -B base.dmd -u 1
[ Saved ][ ] SRC/B.txt
[ Saved ][ ] SRC/A.txt
root@terre:/mnt/memdisk# dar_manager -B base.dmd -u 2
[ Saved ][ ] SRC
[ Saved ][ ] SRC/C.txt
root@terre:/mnt/memdisk#
dar_manager can even take for you the actions to invoke dar as many time as necessary get the file's status of a given date for a given set of subset of the saved files:
root@terre:/mnt/memdisk# dar_manager -v -B base.dmd -e "-R DST -w" -r SRC
Decompressing and loading database to memory...
Looking in archives for requested files, classifying files archive by archive...
Checking chronological ordering of files between the archives...
File recorded as removed at this date in database: SRC/A.txt
CALLING DAR: restoring 1 files from archive ./full using anonymous pipe to transmit configuration to the dar process
Arguments sent through anonymous pipe are:
dar -x ./full -R DST -w -g SRC/B.txt
--------------------------------------------
2 inode(s) restored
including 0 hard link(s)
0 inode(s) not restored (not saved in archive)
0 inode(s) not restored (overwriting policy decision)
1 inode(s) ignored (excluded by filters)
0 inode(s) failed to restore (filesystem error)
0 inode(s) deleted
--------------------------------------------
Total number of inode(s) considered: 3
--------------------------------------------
EA restored for 0 inode(s)
FSA restored for 0 inode(s)
--------------------------------------------
CALLING DAR: restoring 2 files from archive ./diff using anonymous pipe to transmit configuration to the dar process
Arguments sent through anonymous pipe are:
dar -x ./diff -R DST -w -g SRC -g SRC/C.txt
Error while restoring /mnt/memdisk/DST/SRC/A.txt : Cannot remove non-existent file from filesystem: /mnt/memdisk/DST/SRC/A.txt
--------------------------------------------
2 inode(s) restored
including 0 hard link(s)
1 inode(s) not restored (not saved in archive)
0 inode(s) not restored (overwriting policy decision)
0 inode(s) ignored (excluded by filters)
1 inode(s) failed to restore (filesystem error)
0 inode(s) deleted
--------------------------------------------
Total number of inode(s) considered: 4
--------------------------------------------
EA restored for 0 inode(s)
FSA restored for 0 inode(s)
--------------------------------------------
Final memory cleanup...
All files asked could not be restored
DAR sub-process has terminated with exit code 5 Continue anyway ? [return = YES | Esc = NO]
Continuing...
root@terre:/mnt/memdisk# ls -lR DST
DST:
total 0
drwxr-xr-x 2 root root 80 Nov 6 18:38 SRC
DST/SRC:
total 8
-rw-r--r-- 1 root root 24 Nov 6 18:37 B.txt
-rw-r--r-- 1 root root 21 Nov 6 18:38 C.txt
root@terre:/mnt/memdisk#
root@terre:/mnt/memdisk# ./historization_feature SRC phase1
root@terre:/mnt/memdisk# rsync -arvHAX SRC DST
sending incremental file list
created directory DST
SRC/
SRC/A.txt
SRC/B.txt
sent 229 bytes received 84 bytes 626.00 bytes/sec
total size is 37 speedup is 0.12
root@terre:/mnt/memdisk# ./historization_feature SRC phase2
root@terre:/mnt/memdisk# rsync -arvHAX SRC DST
sending incremental file list
SRC/
SRC/C.txt
sent 172 bytes received 39 bytes 422.00 bytes/sec
total size is 45 speedup is 0.21
root@terre:/mnt/memdisk# ls -l
total 4
drwxr-xr-x 3 root root 60 Nov 6 17:06 DST
drwxr-xr-x 2 root root 80 Nov 6 17:06 SRC
-rwxr--r-- 1 root root 589 Nov 6 16:32 historization_feature
root@terre:/mnt/memdisk# ls -l DST
total 0
drwxr-xr-x 2 root root 100 Nov 6 17:06 SRC
root@terre:/mnt/memdisk# ls -l DST/SRC
total 12
-rw-r--r-- 1 root root 13 Nov 6 17:05 A.txt
-rw-r--r-- 1 root root 24 Nov 6 17:05 B.txt
-rw-r--r-- 1 root root 21 Nov 6 17:06 C.txt
root@terre:/mnt/memdisk# rsync -arvHAX --delete SRC DST
sending incremental file list
deleting SRC/A.txt
sent 101 bytes received 26 bytes 254.00 bytes/sec
total size is 45 speedup is 0.35
root@terre:/mnt/memdisk# ls -l DST/SRC
total 8
-rw-r--r-- 1 root root 24 Nov 6 17:05 B.txt
-rw-r--r-- 1 root root 21 Nov 6 17:06 C.txt
root@terre:/mnt/memdisk#
the "backup" contains all three files, A.txt, B.txt and C.txt while the first and the later never existed at the same time. Such backup does not allow to have neither the state of the phase1 nor the state of the phase2.
We added the --delete
option and as result we got
to be the phase2 state only. But then we cannot restore to the phase1 state as the
file A.txt has been deleted from the backup.
To have both states with rsync, we should call rsync to a different destination directory at each new backup time, which would consume a lot of space and would also defeat one of the main features of rsync which is its ability to synchronize two directories exchanging only the minimal information that was modified.
root@terre:/mnt/memdisk# rmdir SRC
root@terre:/mnt/memdisk# ./historization_feature SRC phase1
root@terre:/mnt/memdisk# tar --listed-incremental=snapshot.file -cf full.tar SRC
root@terre:/mnt/memdisk# ./historization_feature SRC phase2
root@terre:/mnt/memdisk# tar --listed-incremental=snapshot.file -cf diff.tar SRC
root@terre:/mnt/memdisk# mkdir DST
root@terre:/mnt/memdisk# cd DST
root@terre:/mnt/memdisk/DST# tar --listed-incremental=snapshot.file -xf ../full.tar
root@terre:/mnt/memdisk/DST# ls -l SRC
total 8
-rw-r--r-- 1 root root 13 Nov 6 18:20 A.txt
-rw-r--r-- 1 root root 24 Nov 6 18:20 B.txt
root@terre:/mnt/memdisk/DST# tar --listed-incremental=snapshot.file -xf ../diff.tar
root@terre:/mnt/memdisk/DST# ls -l SRC
total 8
-rw-r--r-- 1 root root 24 Nov 6 18:20 B.txt
-rw-r--r-- 1 root root 21 Nov 6 18:21 C.txt
root@terre:/mnt/memdisk/DST#
We could restore from backup both the phase1 and phase2 status, historization is available with tar.
We want to save /lib
except the content of /lib/modules
:
root@terre:/mnt/memdisk# dar -c backup -R /lib -P modules -vs -q
Skipping file: /lib/modules
root@terre:/mnt/memdisk#
What if we want to exclude all of to exclude /lib/module
except
/lib/module/4.19.0-12-amd64
?
root@terre:/mnt/memdisk# rm backup.1.dar
rm: remove regular file 'backup.1.dar'? y
root@terre:/mnt/memdisk# dar -c backup -R /lib -am -P modules -g modules/4.19.0-12-amd64 -vs -q
Skipping file: /lib/modules/4.19.0-11-amd64
Skipping file: /lib/modules/4.19.0-10-amd64
root@terre:/mnt/memdisk#
OK, we can mix included directories and excluded directories
root@terre:/mnt/memdisk# rm -rf DST
root@terre:/mnt/memdisk# mkdir DST
root@terre:/mnt/memdisk# rsync -arHAXS --exclude /lib/modules /lib DST
root@terre:/mnt/memdisk# ls -ld DST/lib/m*
drwxr-xr-x 2 root root 80 Jun 11 23:33 DST/lib/modprobe.d
root@terre:/mnt/memdisk#
root@terre:/mnt/memdisk# ls -l DST/lib/modules
ls: cannot access 'DST/lib/modules': No such file or directory
root@terre:/mnt/memdisk#
We could exclude /lib/modules
as expected. As previously, let's exclude it
except /lib/modules/4.19.0-12-amd64
:
root@terre:/mnt/memdisk# rm -rf DST
root@terre:/mnt/memdisk# rsync -arHAXS -f "+ /lib/modules" -f "- /lib/modules/4.19.0-12-amd64" /lib DST
root@terre:/mnt/memdisk# la DST/lib/modules/
total 0
drwxr-xr-x 4 root root 80 Oct 22 10:33 .
drwxr-xr-x 19 root root 420 Oct 22 11:20 ..
drwxr-xr-x 3 root root 280 Aug 8 12:58 4.19.0-10-amd64
drwxr-xr-x 3 root root 280 Oct 12 11:25 4.19.0-11-amd64
root@terre:/mnt/memdisk#
OK, we can mix included directories and excluded directories
Let's save /lib and excluding /lib/module again:
root@terre:/mnt/memdisk# tar --exclude /lib/modules -cf backup.tar /lib
tar: Removing leading `/' from member names
root@terre:/mnt/memdisk# tar -tf backup.tar | grep modules
root@terre:/mnt/memdisk#
Now let's exclude /lib/modules
except /lib/modules/4.19.0-12-amd64
:
root@terre:/mnt/memdisk# tar -cf backup.tar /lib/modules/4.19.0-12-amd64/ --exclude /lib/modules /lib
tar: Removing leading `/' from member names
tar: Removing leading `/' from hard link targets
root@terre:/mnt/memdisk# tar -tf backup.tar | wc -l
6017
root@terre:/mnt/memdisk# tar -tf backup.tar | grep -v "lib/modules/4.19.0-12-amd64" | wc -l
1626
root@terre:/mnt/memdisk# tar -tf backup.tar | grep "lib/modules/4.19.0-12-amd64" | wc -l
4391
root@terre:/mnt/memdisk# tar -tf backup.tar | grep "lib/modules" | wc -l
4391
root@terre:/mnt/memdisk#
The backup contains a total of 6017 entries, 1626 are out of the lib/modules/4.19.0-12-amd64
directory, the rest is all in that previous directory, nothing else is found in lib/modules
while there was lib/modules/4.19.0-11-amd64
and lib/modules/4.19.0-10-amd64
subdirectory. We can thus mix included and included directories.
root@terre:/mnt/memdisk# dar -c backup -R /lib -X "*.ko"
--------------------------------------------
4122 inode(s) saved
including 0 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
0 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
10677 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 14799
--------------------------------------------
EA saved for 0 inode(s)
FSA saved for 3945 inode(s)
--------------------------------------------
root@terre:/mnt/memdisk# mkdir DST
root@terre:/mnt/memdisk# dar -x backup -R DST --fsa-scope none
--------------------------------------------
4122 inode(s) restored
including 0 hard link(s)
0 inode(s) not restored (not saved in archive)
0 inode(s) not restored (overwriting policy decision)
0 inode(s) ignored (excluded by filters)
0 inode(s) failed to restore (filesystem error)
0 inode(s) deleted
--------------------------------------------
Total number of inode(s) considered: 4122
--------------------------------------------
EA restored for 0 inode(s)
FSA restored for 0 inode(s)
--------------------------------------------
root@terre:/mnt/memdisk# find DST -name "*.ko" -ls
root@terre:/mnt/memdisk#
we would exclude all file having the ko
extension, what if we do not
want to exclude those that start with ext
?
root@terre:/mnt/memdisk# dar -c backup -R /lib -am -X "*.ko" -I "ext*"
--------------------------------------------
4128 inode(s) saved
including 0 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
0 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
10671 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 14799
--------------------------------------------
EA saved for 0 inode(s)
FSA saved for 3951 inode(s)
--------------------------------------------
root@terre:/mnt/memdisk# rm -rf DST
root@terre:/mnt/memdisk# mkdir DST
root@terre:/mnt/memdisk# dar -x backup -R DST --fsa-scope none -q
root@terre:/mnt/memdisk# find DST -name "*.ko" -print
DST/modules/4.19.0-10-amd64/kernel/fs/ext4/ext4.ko
DST/modules/4.19.0-10-amd64/kernel/drivers/extcon/extcon-core.ko
DST/modules/4.19.0-11-amd64/kernel/fs/ext4/ext4.ko
DST/modules/4.19.0-11-amd64/kernel/drivers/extcon/extcon-core.ko
DST/modules/4.19.0-12-amd64/kernel/fs/ext4/ext4.ko
DST/modules/4.19.0-12-amd64/kernel/drivers/extcon/extcon-core.ko
root@terre:/mnt/memdisk#
OK, we got what we wanted
root@terre:/mnt/memdisk# rm -rf DST
root@terre:/mnt/memdisk# rsync -arHAXS -f "- *.ko" /lib DST
root@terre:/mnt/memdisk# find DST -name "*.ko" -print
root@terre:/mnt/memdisk# ls DST
lib
root@terre:/mnt/memdisk#
Same as previously, we don't want to exclude ko
files starting by ext
:
root@terre:/mnt/memdisk# rm -rf DST
root@terre:/mnt/memdisk# rsync -arHAXS -f "+ ext*" -f "- *.ko" /lib DST
root@terre:/mnt/memdisk# find DST -name "*.ko" -print
DST/lib/modules/4.19.0-12-amd64/kernel/fs/ext4/ext4.ko
DST/lib/modules/4.19.0-12-amd64/kernel/drivers/extcon/extcon-core.ko
DST/lib/modules/4.19.0-11-amd64/kernel/fs/ext4/ext4.ko
DST/lib/modules/4.19.0-11-amd64/kernel/drivers/extcon/extcon-core.ko
DST/lib/modules/4.19.0-10-amd64/kernel/fs/ext4/ext4.ko
DST/lib/modules/4.19.0-10-amd64/kernel/drivers/extcon/extcon-core.ko
root@terre:/mnt/memdisk#
OK, we got what we wanted
Same as previously, let's filter out kernel object files
root@terre:/mnt/memdisk# tar -cf backup.tar --exclude "*.ko" /lib
tar: Removing leading `/' from member names
root@terre:/mnt/memdisk# rm -rf DST
root@terre:/mnt/memdisk# mkdir DST
root@terre:/mnt/memdisk# cd DST
root@terre:/mnt/memdisk/DST# tar -xf ../backup.tar
root@terre:/mnt/memdisk/DST# find . -name "*.ko" -print
root@terre:/mnt/memdisk/DST#
Now, we want to keep only those kernel object files starting with ext
root@terre:/mnt/memdisk# tar -cf backup.tar "ext*" --exclude "*.ko" /lib
tar: ext*: Cannot stat: No such file or directory
tar: Removing leading `/' from member names
tar: Removing leading `/' from hard link targets
tar: Exiting with failure status due to previous errors
root@terre:/mnt/memdisk#
Well, argument passed out of option do not seem expanded by tar thus using mask is not possible to include some pattern. It seems the only option is to use file listing, thing we will evaluate below.
We will use a tmpfs filesystem mounted twice thanks to mount's --bind option. The objective is first to save every thing except a few given filesystems, or only one or save inside a few given filesystems. Here is the preparation phase:
root@terre:/mnt/memdisk# mkdir SRC
root@terre:/mnt/memdisk# mkdir SRC/D1 SRC/D2 SRC/D3
root@terre:/mnt/memdisk# mount -t tmpfs tmpfs SRC/D1
root@terre:/mnt/memdisk# mount --bind SRC/D1 SRC/D2
root@terre:/mnt/memdisk# mount -t tmpfs tmpfs SRC/D3
root@terre:/mnt/memdisk# ls SRC/D1 SRC/D2
SRC/D1:
SRC/D2:
root@terre:/mnt/memdisk# echo "Hello World" > SRC/D1/file.txt
root@terre:/mnt/memdisk# ls SRC/D1 SRC/D2
SRC/D1:
file.txt
SRC/D2:
file.txt
root@terre:/mnt/memdisk# echo "give me your data, I'll tell your needs and what to buy" > SRC/gafam.com
root@terre:/mnt/memdisk# echo "sight" > SRC/D3/democracy.org
root@terre:/mnt/memdisk#
root@terre:/mnt/memdisk# dar -c backup -R SRC -MX:/mnt/memdisk/SRC/D1 -vs -vt -q
Adding folder to archive: /mnt/memdisk/SRC/D3
Adding file to archive: /mnt/memdisk/SRC/D3/democracy.org
Adding file to archive: /mnt/memdisk/SRC/gafam.com
Skipping file: /mnt/memdisk/SRC/D2
Skipping file: /mnt/memdisk/SRC/D1
root@terre:/mnt/memdisk#
We could exclude a filesystem, and its second appearance in D2 was also excluded,
whithout having to mention it. Let's include only D1
now:
root@terre:/mnt/memdisk# rm -f backup.1.dar
root@terre:/mnt/memdisk# dar -c backup -R SRC -MI:/mnt/memdisk/SRC/D1 -vs -vt -q
Skipping file: /mnt/memdisk/SRC/D3
Adding file to archive: /mnt/memdisk/SRC/gafam.com
Adding folder to archive: /mnt/memdisk/SRC/D2
Adding file to archive: /mnt/memdisk/SRC/D2/file.txt
Adding folder to archive: /mnt/memdisk/SRC/D1
Adding file to archive: /mnt/memdisk/SRC/D1/file.txt
root@terre:/mnt/memdisk#
OK, we got what we wanted
root@terre:/mnt/memdisk# rsync -arvHAXS --one-file-system SRC DST
sending incremental file list
created directory DST
SRC/
SRC/gafam.com
SRC/D1/
SRC/D2/
SRC/D3/
sent 283 bytes received 77 bytes 720.00 bytes/sec
total size is 56 speedup is 0.16
root@terre:/mnt/memdisk#
rsync has ony one option about filesystems, it sticks recursion to the filesystem of the source directory, we cannot exclude specifically some filesystems, they are all excluded, and we cannot include specifically some filesystems, none is excluded (default behavior without this option)
root@terre:/mnt/memdisk# tar -cvf backup.tar --one-file-system SRC
SRC/
SRC/D3/
tar: SRC/D3/: file is on a different filesystem; not dumped
SRC/gafam.com
SRC/D2/
tar: SRC/D2/: file is on a different filesystem; not dumped
SRC/D1/
tar: SRC/D1/: file is on a different filesystem; not dumped
root@terre:/mnt/memdisk#
tar does not behaves better than rsync on that topic
by tag we mean any mark the user can add to a file that will drive its fate when backup will be done. The most common is the dump flag, but it is not always available, using some other mechanisms (Extended Attributes,...) can be an interesting alternative.
root@terre:/var/tmp# mkdir SRC
root@terre:/var/tmp# echo "Hello" > file1.txt
root@terre:/var/tmp# echo "World" > file2.txt
root@terre:/var/tmp# chattr +d file1.txt
root@terre:/var/tmp# setfattr -n user.no_dump file2.txt
root@terre:/var/tmp# mv file1.txt file2.txt SRC
root@terre:/var/tmp# dar -c backup -w -R SRC --nodump -vt -q
Adding file to archive: /var/tmp/SRC/file2.txt
Saving Extended Attributes for /var/tmp/SRC/file2.txt
Saving Filesystem Specific Attributes for /var/tmp/SRC/file2.txt
root@terre:/var/tmp# dar -c backup -w -R SRC --exclude-by-ea=user.no_dump -vt -q
Adding file to archive: /var/tmp/SRC/file1.txt
Saving Filesystem Specific Attributes for /var/tmp/SRC/file1.txt
root@terre:/var/tmp#
We have two mechanisms, one based on the dump flag and an arbitrary extended attribute. However dar only supports exclusion of file, not inclusion for backup based on a tag.
rsync does not seem to be able to filter based on an arbitrary mark
rsync does not seem to be able to filter based on an arbitrary mark
We build a file listing and expect to either have only those file saved or excluded from the performed backup. Here is the listing preparation:
root@terre:/mnt/memdisk# find /lib -name "*.ko" -o -print > include.txt
root@terre:/mnt/memdisk# wc -l include.txt
4123 include.txt
root@terre:/mnt/memdisk# find /lib -name "*.ko" -print > exclude.txt
root@terre:/mnt/memdisk# wc -l exclude.txt
10677 exclude.txt
root@terre:/mnt/memdisk#
root@terre:/mnt/memdisk# dar -c backup -R /lib -[ include.txt
--------------------------------------------
4122 inode(s) saved
including 0 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
0 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
10677 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 14799
--------------------------------------------
EA saved for 0 inode(s)
FSA saved for 3945 inode(s)
--------------------------------------------
root@terre:/mnt/memdisk#
file inclusion is available, let's see file exclusion:
root@terre:/mnt/memdisk# dar -c backup -R /lib -] exclude.txt
--------------------------------------------
4122 inode(s) saved
including 0 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
0 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
10677 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 14799
--------------------------------------------
EA saved for 0 inode(s)
FSA saved for 3945 inode(s)
--------------------------------------------
root@terre:/mnt/memdisk#
root@terre:/mnt/memdisk# rsync -aHAXS --files-from=include.txt / DST
root@terre:/mnt/memdisk# find DST -print | wc -l
4124
root@terre:/mnt/memdisk# find DST -name "*.ko" -print
root@terre:/mnt/memdisk#
file inclusion is available. However if we can exclude a list of pattern defined in a file, we cannot exclude a list of files. We should prepend each entry by "- " seen the filtering syntax of rsync:
root@terre:/mnt/memdisk# sed -r 's/^/- /' exclude.txt > rsync-exclude.txt
root@terre:/mnt/memdisk# rm -rf DST
root@terre:/mnt/memdisk# rsync -aHAXS --exclude-from=rsync-exclude.txt /lib DST
root@terre:/mnt/memdisk# find DST -print | wc -l
4124
root@terre:/mnt/memdisk# find DST -name "*.ko" -print | wc -l
0
root@terre:/mnt/memdisk#
So we are good with this sed additional listing adaptation.
root@terre:/mnt/memdisk# tar -cvf backup.tar --files-from=include.txt | wc -l
tar: Removing leading `/' from member names
98017
root@terre:/mnt/memdisk# tar -tf backup.tar | grep .ko | wc -l
73392
root@terre:/mnt/memdisk# grep .ko include.txt | wc -l
3
root@terre:/mnt/memdisk# grep .ko include.txt
/lib/modules/4.19.0-12-amd64/kernel/sound/pci/korg1212
/lib/modules/4.19.0-11-amd64/kernel/sound/pci/korg1212
/lib/modules/4.19.0-10-amd64/kernel/sound/pci/korg1212
root@terre:/mnt/memdisk#
the include.txt file does not contain any file with the ko extension, however tar
saved all of them. Reading back the man page concerning this --files-from option
The names read are handled the same way as command line arguments explains that
in the listing where all "*.ko"
files have been removed, remain their parent
directory, which implies saving all its content. In consequence we must not list
directories only their content (which will restrict us saving empty directories as such).
Let's modify the include.txt file that way:
find /lib -type d -o -name "*.ko" -o -print > tar-include.txt
root@terre:/mnt/memdisk# tar -cvf backup.tar --files-from=tar-include.txt | wc -l
tar: Removing leading `/' from member names
1532
root@terre:/mnt/memdisk# tar -tvf backup.tar | grep "*.ko"
root@terre:/mnt/memdisk# wc -l tar-include.txt
1532 tar-include.txt
root@terre:/mnt/memdisk# wc -l include.txt
4123 include.txt
the difference between the 1532 entries saved by tar and the 4123 saved by rsync or dar comes from the many empty directories that cannot be saved as such by tar using this method.
root@terre:/mnt/memdisk# tar -cvf backup.tar --exclude-from=exclude.txt /lib | wc -l
tar: Removing leading `/' from member names
4123
root@terre:/mnt/memdisk# wc -l exclude.txt
10677 exclude.txt
root@terre:/mnt/memdisk# tar -tf backup.tar | egrep "\.ko$"
root@terre:/mnt/memdisk#
The file listing exclusion works as expected
For this test we will backup the content of /usr/bin of the running system. We select a slice size smaller than the biggest file under backup. The use case for slicing implies compression (remote storage, cloud storage, limited removable media storage...).
root@terre:/mnt/memdisk# ls -lh --sort=size /usr/bin | tac | tail
-rwxr-xr-x 1 root root 8.0M Dec 18 2018 luajittex
-rwxr-xr-x 1 root root 8.1M Dec 18 2018 luatex53
-rwxr-xr-x 1 root root 8.1M Dec 18 2018 luatex
-rwxr-xr-x 1 root root 8.2M May 27 2019 wireshark
-rwxr-xr-x 1 root root 12M Dec 21 2018 kstars
-rwxr-xr-x 1 root root 15M Mar 12 2018 doxygen
-rwxr-xr-x 1 root root 16M Jan 4 2019 stellarium
-rwxr-xr-x 1 root root 19M Oct 12 19:46 mysql_embedded
-rwxr-xr-x 1 root root 39M Sep 5 2019 emacs-gtk
total 430M
root@terre:/mnt/memdisk#
terre:/mnt/memdisk# dar -c backup -R /usr/bin -z6 -s 20M -q
terre:/mnt/memdisk# ls -lh backup.*
-rw-r--r-- 1 root root 20M Nov 13 11:30 backup.1.dar
-rw-r--r-- 1 root root 20M Nov 13 11:30 backup.2.dar
-rw-r--r-- 1 root root 20M Nov 13 11:30 backup.3.dar
-rw-r--r-- 1 root root 20M Nov 13 11:30 backup.4.dar
-rw-r--r-- 1 root root 20M Nov 13 11:30 backup.5.dar
-rw-r--r-- 1 root root 20M Nov 13 11:30 backup.6.dar
-rw-r--r-- 1 root root 20M Nov 13 11:30 backup.7.dar
-rw-r--r-- 1 root root 7.7M Nov 13 11:30 backup.8.dar
terre:/mnt/memdisk# mkdir DST
terre:/mnt/memdisk# dar -x backup -R DST -g emacs-gtk -E "echo openning slice %p/%b.%N.%e"
openning slice /mnt/memdisk/backup.8.dar
openning slice /mnt/memdisk/backup.4.dar
openning slice /mnt/memdisk/backup.5.dar
Restoration of FSA for /mnt/memdisk/DST/emacs-gtk aborted: Failed reading existing extX family FSA: Inappropriate ioctl for device
Restoration of linux immutable FSA for /mnt/memdisk/DST/emacs-gtk aborted: Failed reading existing extX family FSA: Inappropriate ioctl for device
--------------------------------------------
1 inode(s) restored
including 0 hard link(s)
0 inode(s) not restored (not saved in archive)
0 inode(s) not restored (overwriting policy decision)
2591 inode(s) ignored (excluded by filters)
0 inode(s) failed to restore (filesystem error)
0 inode(s) deleted
--------------------------------------------
Total number of inode(s) considered: 2592
--------------------------------------------
EA restored for 0 inode(s)
FSA restored for 0 inode(s)
--------------------------------------------
terre:/mnt/memdisk# diff DST/emacs-gtk /usr/bin/emacs-gtk
memdiskerre:/mnt/memdisk# echo $?
0
terre:/mnt/memdisk#
We can also specify a different size for the first slice when using dar, this was used in the past to fulfill a disk partially filled by a previous incremental backup when saving onto CD-RW and DVD-RW, but that may still make sense when using USB keys or any other removable media.
root@terre:/mnt/memdisk# dar -c backup -R /usr/bin -s 20M -S 1M -q --min-digit 3
root@terre:/mnt/memdisk# ls -lh
total 361M
-rw-r--r-- 1 root root 1.0M Nov 6 18:57 backup.001.dar
-rw-r--r-- 1 root root 20M Nov 6 18:57 backup.002.dar
-rw-r--r-- 1 root root 20M Nov 6 18:57 backup.003.dar
-rw-r--r-- 1 root root 20M Nov 6 18:57 backup.004.dar
-rw-r--r-- 1 root root 20M Nov 6 18:57 backup.005.dar
-rw-r--r-- 1 root root 20M Nov 6 18:57 backup.006.dar
-rw-r--r-- 1 root root 20M Nov 6 18:57 backup.007.dar
-rw-r--r-- 1 root root 20M Nov 6 18:57 backup.008.dar
-rw-r--r-- 1 root root 20M Nov 6 18:57 backup.009.dar
-rw-r--r-- 1 root root 20M Nov 6 18:57 backup.010.dar
-rw-r--r-- 1 root root 20M Nov 6 18:57 backup.011.dar
-rw-r--r-- 1 root root 20M Nov 6 18:57 backup.012.dar
-rw-r--r-- 1 root root 20M Nov 6 18:57 backup.013.dar
-rw-r--r-- 1 root root 20M Nov 6 18:57 backup.014.dar
-rw-r--r-- 1 root root 20M Nov 6 18:57 backup.015.dar
-rw-r--r-- 1 root root 20M Nov 6 18:57 backup.016.dar
-rw-r--r-- 1 root root 20M Nov 6 18:57 backup.017.dar
-rw-r--r-- 1 root root 20M Nov 6 18:57 backup.018.dar
-rw-r--r-- 1 root root 20M Nov 6 18:57 backup.019.dar
root@terre:/mnt/memdisk# dar -c backup -R /usr/bin -s 20M -S 200M -q --min-digit 3
root@terre:/mnt/memdisk# ls -lh
total 361M
-rw-r--r-- 1 root root 200M Nov 6 18:58 backup.001.dar
-rw-r--r-- 1 root root 20M Nov 6 18:59 backup.002.dar
-rw-r--r-- 1 root root 20M Nov 6 18:59 backup.003.dar
-rw-r--r-- 1 root root 20M Nov 6 18:59 backup.004.dar
-rw-r--r-- 1 root root 20M Nov 6 18:59 backup.005.dar
-rw-r--r-- 1 root root 20M Nov 6 18:59 backup.006.dar
-rw-r--r-- 1 root root 20M Nov 6 18:59 backup.007.dar
-rw-r--r-- 1 root root 20M Nov 6 18:59 backup.008.dar
-rw-r--r-- 1 root root 20M Nov 6 18:59 backup.009.dar
-rw-r--r-- 1 root root 913K Nov 6 18:59 backup.010.dar
root@terre:/mnt/memdisk#
rsync cannot split any file in slices, and it does not generate any backup, but it copies files. You cannot thus split data into slices to fit a particular restricted storage space.
root@terre:/mnt/memdisk# tar -czf backup.tar -M -L 20480 /usr/bin
tar: Cannot use multi-volume compressed archives
Try 'tar --help' or 'tar --usage' for more information.
root@terre:/mnt/memdisk#
As reported by tar above, if a multi-volume support exists, it is quite restrictive as one cannot use compression at the same time.
terre:/mnt/memdisk# tar -cf backup -M -L 20480 /usr/bin
tar: Removing leading `/' from member names
Prepare volume #2 for 'backup' and hit return:
tar: Removing leading `/' from hard link targets
Prepare volume #3 for 'backup' and hit return:
Prepare volume #4 for 'backup' and hit return:
Prepare volume #5 for 'backup' and hit return:
Prepare volume #6 for 'backup' and hit return:
Prepare volume #7 for 'backup' and hit return:
Prepare volume #8 for 'backup' and hit return:
Prepare volume #9 for 'backup' and hit return:
Prepare volume #10 for 'backup' and hit return:
Prepare volume #11 for 'backup' and hit return:
Prepare volume #12 for 'backup' and hit return:
Prepare volume #13 for 'backup' and hit return:
Prepare volume #14 for 'backup' and hit return:
Prepare volume #15 for 'backup' and hit return:
Prepare volume #16 for 'backup' and hit return:
Prepare volume #17 for 'backup' and hit return:
Prepare volume #18 for 'backup' and hit return:
Prepare volume #19 for 'backup' and hit return:
Prepare volume #20 for 'backup' and hit return:
Prepare volume #21 for 'backup' and hit return:
terre:/mnt/memdisk#
terre:/mnt/memdisk# ls -l backup*
-rw-r--r-- 1 root root 19527680 Nov 13 11:40 backup
terre:/mnt/memdisk#
But even without compression, tar is still restrictive: it does not produce
different files, you have each new volume around and hit return
at each time.
Note also that without compression, the space required passes
from 8 volumes with dar to 21 volumes with tar.
The multi-volume support for tar seems well defined for local tape removable devices, but will cost more than twice more tape than what you can do with dar even if tape media is your only target. Here is an example with dar on how to write to mutli-volume and compressed backup to tape and pause between each volume as tar does:
terre:/mnt/memdisk# dar -c backup -R /usr/bin -z6 -s 20M -E "echo writing volume %N to tape" -E "cat < %p/%b.%N.%e > /dev/mt" -p
class=green>writing volume 1 to tape
Finished writing to file 1, ready to continue ? [return = YES | Esc = NO]
Continuing...
writing volume 2 to tape
Finished writing to file 2, ready to continue ? [return = YES | Esc = NO]
Continuing...
writing volume 3 to tape
Finished writing to file 3, ready to continue ? [return = YES | Esc = NO]
Continuing...
writing volume 4 to tape
Finished writing to file 4, ready to continue ? [return = YES | Esc = NO]
Continuing...
writing volume 5 to tape
Finished writing to file 5, ready to continue ? [return = YES | Esc = NO]
Continuing...
writing volume 6 to tape
Finished writing to file 6, ready to continue ? [return = YES | Esc = NO]
Continuing...
writing volume 7 to tape
Finished writing to file 7, ready to continue ? [return = YES | Esc = NO]
Continuing...
writing volume 8 to tape
--------------------------------------------
2592 inode(s) saved
including 5 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
0 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
0 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 2592
--------------------------------------------
EA saved for 3 inode(s)
FSA saved for 2152 inode(s)
--------------------------------------------
terre:/mnt/memdisk#
Encryption has for target relatively long term lifetime, having compression at the same time to increase security as it increases data "randomness" of the data to cipher. So we will use both in our tests (gzip with a compresion level of 6).
A point to pay attention to concerns the way the password/passphrase can be provided. Putting this to the command-line could let other users on this same system read it. Having interactive prompt is better as well as having the password set in a read access restricted file, which in addition allows automation.
root@terre:/mnt/memdisk# dar -c backup -R / -g usr/bin -K aes256: -q -z6
Archive backup requires a password:
Please confirm your password:
root@terre:/mnt/memdisk# dar -l backup -q
Archive backup requires a password:
Warning, the archive backup has been encrypted. A wrong key is not possible to detect, it would cause DAR to report the archive as corrupted
Archive version format : 11
Compression algorithm used : gzip
Compression block size used : 0
Symmetric key encryption used : AES 256
Asymmetric key encryption used : none
Archive is signed : no
Sequential reading marks : present
User comment : N/A
KDF iteration count : 10000
KDF hash algorithm : argon2
Salt size : 32 bytes
Catalogue size in archive : 101907 bytes
Archive is composed of 1 file(s)
File size: 155070897 bytes
The global data compression ratio is: 64%
CATALOGUE CONTENTS :
total number of inode : 2589
fully saved : 2589
binay delta patch : 0
inode metadata only : 0
distribution of inode(s)
- directories : 2
- plain files : 2152
- symbolic links : 435
- named pipes : 0
- unix sockets : 0
- character devices : 0
- block devices : 0
- Door entries : 0
hard links information
- number of inode with hard link : 5
- number of reference to hard linked inodes: 10
destroyed entries information
0 file(s) have been record as destroyed since backup of reference
root@terre:/mnt/memdisk# touch pass.dcf
root@terre:/mnt/memdisk# chmod go-rwx pass.dcf
root@terre:/mnt/memdisk# cat >> pass.dcf
-K "aes256:hello world!"
root@terre:/mnt/memdisk# ls -l pass.dcf
-rw------- 1 root root 25 Nov 9 11:37 pass.dcf
root@terre:/mnt/memdisk# rm backup.1.dar
rm: remove regular file 'backup.1.dar'? y
root@terre:/mnt/memdisk# dar -c backup -R / -g usr/bin -B pass.dcf -q -z6
root@terre:/mnt/memdisk# dar -l backup -q -B pass.dcf
Warning, the archive backup has been encrypted. A wrong key is not possible to detect, it would cause DAR to report the archive as corrupted
Archive version format : 11
Compression algorithm used : gzip
Compression block size used : 0
Symmetric key encryption used : AES 256
Asymmetric key encryption used : none
Archive is signed : no
Sequential reading marks : present
User comment : N/A
KDF iteration count : 10000
KDF hash algorithm : argon2
Salt size : 32 bytes
Catalogue size in archive : 102310 bytes
Archive is composed of 1 file(s)
File size: 155132433 bytes
The global data compression ratio is: 64%
CATALOGUE CONTENTS :
total number of inode : 2589
fully saved : 2589
binay delta patch : 0
inode metadata only : 0
distribution of inode(s)
- directories : 2
- plain files : 2152
- symbolic links : 435
- named pipes : 0
- unix sockets : 0
- character devices : 0
- block devices : 0
- Door entries : 0
hard links information
- number of inode with hard link : 5
- number of reference to hard linked inodes: 10
destroyed entries information
0 file(s) have been record as destroyed since backup of reference
root@terre:/mnt/memdisk#
We can provide password either on command-line (not recommended), prompted by dar once launched or from a protected configuration file. In the following we add slicing to encryption to see whether or not dar deciphers the whole backup to recover a single file:
root@terre:/mnt/localdisk# rm -rf backup.*
root@terre:/mnt/localdisk# dar -c backup -R / -g usr/bin -K aes256: -s 1M -q -z6
Archive backup requires a password:
Please confirm your password:
root@terre:/mnt/localdisk# ls -l backup.* | wc -l
148
root@terre:/mnt/localdisk# dar -x backup -g usr/bin/emacs-gtk -E "echo openning slice %b.%N.%e" -q
openning slice backup.148.dar
Archive backup requires a password:
Warning, the archive backup has been encrypted. A wrong key is not possible to detect, it would cause DAR to report the archive as corrupted
openning slice backup.1.dar
openning slice backup.80.dar
openning slice backup.81.dar
openning slice backup.82.dar
openning slice backup.83.dar
openning slice backup.84.dar
root@terre:/mnt/localdisk#
As seen above, dar does not need to uncipher nor uncompress the whole backup to recover a single file, the use of slicing let us see which slice it accessed to, but the behavior is the same without slicing and can be measure by the execution time (see the performance tests logs).
rsync cannot cipher data, it can rely on ssh to cipher the data over the network but data is finally always stored in clear text.
There is no native support for ciphering with tar. You can however pipe tar's output to openssl to cipher the generated backup on fly as a whole.
root@terre:/mnt/memdisk# tar -czf - /usr/bin | openssl enc -e -aes256 -out backup.tar.gz.crypted
tar: Removing leading `/' from member names
enter aes-256-cbc encryption password:
Verifying - enter aes-256-cbc encryption password:
*** WARNING : deprecated key derivation used.
Using -iter or -pbkdf2 would be better.
tar: Removing leading `/' from hard link targets
root@terre:/mnt/memdisk# openssl enc -d -aes256 -in backup.tar.gz.crypted | tar -xz
enter aes-256-cbc decryption password:
*** WARNING : deprecated key derivation used.
Using -iter or -pbkdf2 would be better.
root@terre:/mnt/memdisk# tar -czf - /usr/bin | openssl enc -e -aes256 -out backup.tar.gz.crypted -pass file:pass.txt
tar: Removing leading `/' from member names
*** WARNING : deprecated key derivation used.
Using -iter or -pbkdf2 would be better.
tar: Removing leading `/' from hard link targets
root@terre:/mnt/memdisk# openssl enc -d -aes256 -in backup.tar.gz.crypted -pass file:pass.txt | tar -xz
*** WARNING : deprecated key derivation used.
Using -iter or -pbkdf2 would be better.
root@terre:/mnt/memdisk#
with openssl, tar has both the ability to provide the password/passphrase from an interactive prompt and from a protected file. However you will have to remember which algorithm you used in adition to the passphrase. The ciphering being done as a whole, you will have to decipher the whole backup even to just restore a single file. If the backup is large, this may take a long time and may require to download a lot of stuff from a remote storage.
We see that ciphering with tar is possible at the cost of some complex command-line. But this is error-prone as we see the shown warning that the key derivation function is deprecated and we should switch to another one. Moreover you will have to remember which key derivation function and its parameters in addition to the passphrase you provided and in addition to the ciphering algorithm used.
Note: you can also use openssl with dar as we did for tar but it brings all the drawbacks we saw with tar
The objective is to create a backup ciphered using GnuPG public/private key pair, restore the whole backup and restore a single file from it. We will also use compression (gzip level 6) as it may make sense for the corresponding use cases (data exchange over Internet for example).
terre:/mnt/memdisk# dar -c backup -K gnupg::root@terre.systeme-solaire.espace -R SRC -z6 -q
terre:/mnt/memdisk# dar -l backup -q
Warning, the archive backup has been encrypted. A wrong key is not possible to detect, it would cause DAR to report the archive as corrupted
Archive version format : 11
Compression algorithm used : gzip
Compression block size used : 0
Symmetric key encryption used : AES 256
Asymmetric key encryption used : gnupg
Archive is signed : no
Sequential reading marks : present
User comment : N/A
Catalogue size in archive : 68669 bytes
Archive is composed of 1 file(s)
File size: 158261425 bytes
The global data compression ratio is: 64%
CATALOGUE CONTENTS :
total number of inode : 2593
fully saved : 2593
binay delta patch : 0
inode metadata only : 0
distribution of inode(s)
- directories : 1
- plain files : 2157
- symbolic links : 435
- named pipes : 0
- unix sockets : 0
- character devices : 0
- block devices : 0
- Door entries : 0
hard links information
- number of inode with hard link : 0
- number of reference to hard linked inodes: 0
destroyed entries information
0 file(s) have been record as destroyed since backup of reference
terre:/mnt/memdisk# ls -l backup.1.dar
-rw-r--r-- 1 root root 158261425 Nov 9 16:04 backup.1.dar
terre:/mnt/memdisk#
As displayed in the backup header output above the underlying encryption is a symmetric encryption (AES 256 by default), but the AES key is stored ciphered using the private key of the backup recipient which email address is provided (or email adresses, if more than one recipient is expected). This key is randomly chosen by dar and stored ciphered in the archive header. Thus the overall behavior, performance and security of GnuPG withing dar is equivalent to the one of the symmetrical algorithm chosen, with the ability to quickly restore some or all files from an archive, and not waiting/downloading first the whole backup to unciphered it.
Seen above no password or passphrase is asked as the recipient email is ourselves (root@terre.systeme-solaire.espace). Let's cipher for another recipient:
terre:/mnt/memdisk# dar -c backup -K gnupg::dar.linux@free.fr -R SRC -z6 -q -w
terre:/mnt/memdisk# ls -l backup.1.dar
-rw-r--r-- 1 root root 158230913 Nov 9 16:22 backup.1.dar
terre:/mnt/memdisk# dar -l backup -q
FATAL error, aborting operation: Unexpected error reported by GPGME: No secret key
terre:/mnt/memdisk# dar -c backup -K gnupg::dar.linux@free.fr,root@terre.systeme-solaire.espace -R SRC -z6 -q -w
terre:/mnt/memdisk# dar -l backup -q
Warning, the archive backup has been encrypted. A wrong key is not possible to detect, it would cause DAR to report the archive as corrupted
Archive version format : 11
Compression algorithm used : gzip
Compression block size used : 0
Symmetric key encryption used : AES 256
Asymmetric key encryption used : gnupg
Archive is signed : no
Sequential reading marks : present
User comment : N/A
Catalogue size in archive : 68624 bytes
Archive is composed of 1 file(s)
File size: 158252223 bytes
The global data compression ratio is: 64%
CATALOGUE CONTENTS :
total number of inode : 2593
fully saved : 2593
binay delta patch : 0
inode metadata only : 0
distribution of inode(s)
- directories : 1
- plain files : 2157
- symbolic links : 435
- named pipes : 0
- unix sockets : 0
- character devices : 0
- block devices : 0
- Door entries : 0
hard links information
- number of inode with hard link : 0
- number of reference to hard linked inodes: 0
destroyed entries information
0 file(s) have been record as destroyed since backup of reference
terre:/mnt/memdisk#
Here we saw that ciphering for a recipient different than ourself does not allow us to read the resulting backup, however we can define several recipients and if we add ourself, we can read the backup as well as our primary recipients.
Tar cannot hold asymmetrical encryption alone, as for symmetrical encryption we must use an external tool that performes the ciphering operation outside the backup.
terre:/mnt/memdisk# tar -czf - SRC | gpg --encrypt --recipient root@terre.systeme-solaire.espace --output backup.tar.gz.gpg
terre:/mnt/memdisk# ls -l backup.tar.gz.gpg
-rw-r--r-- 1 root root 155337814 Nov 9 16:45 backup.tar.gz.gpg
terre:/mnt/memdisk#
terre:/mnt/memdisk# gpg --decrypt backup.tar.gz.gpg | tar -xzf -
gpg: encrypted with 3072-bit RSA key, ID 97E13D38B007DF30, created 2020-08-08
"root@terre <root@terre.systeme-solaire.espace>"
terre:/mnt/memdisk#
terre:/mnt/memdisk# tar -czf - SRC | gpg --encrypt --recipient dar.linux@free.fr --output backup.tar.gz.gpg
terre:/mnt/memdisk# gpg --decrypt backup.tar.gz.gpg | tar -xzf -
gpg: encrypted with 4096-bit RSA key, ID DB0A2141A4D96ECA, created 2012-09-13
"Denis Corbin (http://dar.linux.free.fr/) <dar.linux@free.fr>"
gpg: decryption failed: No secret key
gzip: stdin: unexpected end of file
tar: Child returned status 1
tar: Error is not recoverable: exiting now
terre:/mnt/memdisk#terre:/mnt/memdisk# tar -czf - SRC | gpg --encrypt --recipient dar.linux@free.fr \
--recipient root@terre.systeme-solaire.espace --output backup.tar.gz.gpg
terre:/mnt/memdisk# gpg --decrypt backup.tar.gz.gpg | tar -xzf -
gpg: encrypted with 4096-bit RSA key, ID DB0A2141A4D96ECA, created 2012-09-13
"Denis Corbin (http://dar.linux.free.fr/) <dar.linux@free.fr>"
gpg: encrypted with 3072-bit RSA key, ID 97E13D38B007DF30, created 2020-08-08
"root@terre <root@terre.systeme-solaire.espace>"
terre:/mnt/memdisk#
Same as for symmetric encryption, the fact that the whole backup is ciphered at once implies to download back the whole backup even to recover just one file.
devuan:/mnt/memdisk# time dar -c backup -K "aes256:hello world!" -at -1 0 -R SRC -q -w
9.782u 3.413s 0:06.28 210.0% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# la backup.1.dar
-rw-r--r-- 1 root root 1572706497 Nov 9 14:50 backup.1.dar
devuan:/mnt/memdisk# time dar -c backup -K "aes256:hello world!" -at -1 0 -R SRC -q -w
9.173u 2.845s 0:05.50 218.3% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# la backup.1.dar
-rw-r--r-- 1 root root 1572655217 Nov 9 14:50 backup.1.dar
devuan:/mnt/memdisk#
When ciphering the same data several times (with symmetric or asymmetric encryption), the resulting backup size changes each time. This is due to the garbage (the elastic buffer) dar adds at the beginnning and at the end of the data to cipher. This way, even if a dar backup has well known structure it is not easy to know precisely where they are positionned in the backup file, which makes plain-text attack much more difficult to succeed if even possible in a reasonable time.
rsync does not provide any way to cipher the backup, it is thus not concerned by protecting against plain-text attack.
devuan:/mnt/memdisk/SRC# time ../tar.backup ../backup.tar.crypted usr
4.112u 2.343s 0:04.72 136.6% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/SRC# ls -l ../bac
backup.1.dar backup.tar.crypted
devuan:/mnt/memdisk/SRC# ls -l ../backup.tar.crypted
-rw-r--r-- 1 root root 1603594272 Nov 9 14:56 ../backup.tar.crypted
devuan:/mnt/memdisk/SRC# time ../tar.backup ../backup.tar.crypted usr
3.952u 2.564s 0:04.79 135.9% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/SRC# ls -l ../backup.tar.crypted
-rw-r--r-- 1 root root 1603594272 Nov 9 14:56 ../backup.tar.crypted
devuan:/mnt/memdisk/SRC#
tar by itself does not provide any ciphering mechanism, however you can cipher the tar generated backups with external tool (for example openssl for symmetric encryption or gpg for asymmetric encryption). However none of these mechanism protect against plain-text attack: tar backup have somehow predictable header contents.
dar uses argon2
by default, with 10,000 iterations. It can
also use pkcs5 v2 (pbkdf2) with md5, sha1 or sha512 algorithm. The user
is able to set the KDF function and iteration count, so we are able to measure
the execution time variation added by the iteration count (taking into account that the data to cipher also
changes depending on the amount of random garbage dar wraps it with):
terre:/mnt/memdisk# time dar -c backup -R SRC -K aes:hello --kdf-param 100k :sha1 -w -q
4.904u 0.572s 0:05.49 99.6% 0+0k 0+0io 0pf+0w
terre:/mnt/memdisk# time dar -c backup -R SRC -K aes:hello --kdf-param 500k :sha1 -w -q
5.805u 0.272s 0:06.08 99.8% 0+0k 0+0io 0pf+0w
terre:/mnt/memdisk# time dar -c backup -R SRC -K aes:hello --kdf-param 1M :sha1 -w -q
6.852u 0.308s 0:07.18 99.5% 0+0k 0+0io 0pf+0w
time dar -c backup -R SRC -K aes:hello --kdf-param 10k:argon2 -w -q
5.092u 0.870s 0:03.50 170.2% 0+0k 0+0io 0pf+0w
terre:/mnt/memdisk# time dar -c backup -R SRC -K aes:hello --kdf-param 10k :argon2 -w -q
5.232u 0.760s 0:03.54 169.2% 0+0k 0+0io 0pf+0w
terre:/mnt/memdisk# time dar -c backup -R SRC -K aes:hello --kdf-param 20k :argon2 -w -q
5.778u 0.822s 0:04.14 159.1% 0+0k 0+0io 0pf+0w
terre:/mnt/memdisk# time dar -c backup -R SRC -K aes:hello --kdf-param 100k :argon2 -w -q
10.613u 0.831s 0:09.00 127.1% 0+0k 0+0io 0pf+0w
terre:/mnt/memdisk# time dar -c backup -R SRC -K aes:hello --kdf-param 1M: argon2 -w -q
66.862u 0.666s 1:05.14 103.6% 0+0k 0+0io 0pf+0w
terre:/mnt/memdisk#
rsync does not provide any way to cipher the backup, it is not concerned by KDF.
As of today (year 2020) openssl only supports PBKDF2: no support for argon2 is available. Argon2 was the winner of the Password Hashing Competition in July 2015. PBKDF2 has been published by the IETF in September 2000 with the RCF 2898
In order stress each backup software on that aspect, we will use an ugly
script always_change that loops forever permanently
invoking touch
on a given file. For the test, we create a source
tree to backup, containing a file of 1 MiB on which we will
apply this script:
terre:/mnt/memdisk# mkdir SRC
terre:/mnt/memdisk# dd if=/dev/zero of=SRC/hello_world bs=10240 count=1024
1024+0 records in
1024+0 records out
10485760 bytes (10 MB, 10 MiB) copied, 0.0107294 s, 977 MB/s
terre:/mnt/memdisk# ./always_change SRC/hello_world &
[1] 7433
terre:/mnt/memdisk# stat SRC/hello_world
File: SRC/hello_world
Size: 10485760 Blocks: 20480 IO Block: 4096 regular file
Device: 1bh/27d Inode: 375588 Links: 1
Access: (0644/-rw-r--r--) Uid: ( 0/ root) Gid: ( 0/ root)
Access: 2020-11-10 16:34:13.806106695 +0100
Modify: 2020-11-10 16:34:13.806106695 +0100
Change: 2020-11-10 16:34:13.806106695 +0100
Birth: -
terre:/mnt/memdisk# stat SRC/hello_world
File: SRC/hello_world
Size: 10485760 Blocks: 20480 IO Block: 4096 regular file
Device: 1bh/27d Inode: 375588 Links: 1
Access: (0644/-rw-r--r--) Uid: ( 0/ root) Gid: ( 0/ root)
Access: 2020-11-10 16:34:14.838104981 +0100
Modify: 2020-11-10 16:34:14.838104981 +0100
Change: 2020-11-10 16:34:14.838104981 +0100
Birth: -
terre:/mnt/memdisk# jobs
[1] + Running ./always_change SRC/hello_world
terre:/mnt/memdisk# ls -l SRC
total 10240
-rw-r--r-- 1 root root 10485760 Nov 10 16:34 hello_world
terre:/mnt/memdisk#
terre:/mnt/memdisk# dar -c backup -R SRC -q
WARNING! File modified while reading it for backup, but no more retry allowed: /mnt/memdisk/SRC/hello_world
terre:/mnt/memdisk# dar -l backup
[Data ][D][ EA ][FSA][Compr][S]| Permission | User | Group | Size | Date | filename
--------------------------------+------------+-------+-------+---------+-------------------------------+------------
[DIRTY ][ ] [---][ 99%][X] -rw-r--r-- 0 0 10 Mio Tue Nov 10 16:34:55 2020 hello_world
terre:/mnt/memdisk# dar -x backup -R DST
File /mnt/memdisk/DST/hello_world has changed during backup and is probably not saved in a valid state ("dirty file"),
do you want to consider it for restoration anyway? [return = YES | Esc = NO]
Continuing...
--------------------------------------------
1 inode(s) restored
including 0 hard link(s)
0 inode(s) not restored (not saved in archive)
0 inode(s) not restored (overwriting policy decision)
0 inode(s) ignored (excluded by filters)
0 inode(s) failed to restore (filesystem error)
0 inode(s) deleted
--------------------------------------------
Total number of inode(s) considered: 1
--------------------------------------------
EA restored for 0 inode(s)
FSA restored for 0 inode(s)
--------------------------------------------
terre:/mnt/memdisk#
DIRTY
terre:/mnt/memdisk# rsync -arvHAXqz --delete SRC DST
terre:/mnt/memdisk#
rsync does not shows anything nor behaves differently (no retry, no change notification).
terre:/mnt/memdisk# tar -cf backup.tar SRC
tar: SRC/hello_world: file changed as we read it
terre:/mnt/memdisk# tar -tvf backup.tar
drwxr-xr-x root/root 0 2020-11-10 16:33 SRC/
-rw-r--r-- root/root 10485760 2020-11-10 16:41 SRC/hello_world
terre:/mnt/memdisk# rm -rf DST
terre:/mnt/memdisk# mkdir DST
terre:/mnt/memdisk# cd DST
terre:/mnt/memdisk/DST# tar -xf ../backup.tar
terre:/mnt/memdisk/DST# ls -l SRC
total 10240
-rw-r--r-- 1 root root 10485760 Nov 10 16:41 hello_world
terre:/mnt/memdisk/DST#
For this test we make a full backup of a Linux source tree, then rename the Documentation directory as doc and make a differential backup of the whole. Renaming files is expected to do at worse the same as removing some and adding new ones, whe should not see all data saved again:
devuan:/mnt/memdisk# du -B1 -s SRC
1121144832 SRC
devuan:/mnt/memdisk# dar -c full -R SRC -z6 -q
devuan:/mnt/memdisk# cd SRC/linux-5.9.2/
devuan:/mnt/memdisk/SRC/linux-5.9.2# mv Documentation/ doc
devuan:/mnt/memdisk/SRC/linux-5.9.2# cd ../..
devuan:/mnt/memdisk# dar -c diff -A full -R SRC -z6 -q
devuan:/mnt/memdisk# ls -l *.dar
-rw-r--r-- 1 root root 17858927 Nov 1 18:18 diff.1.dar
-rw-r--r-- 1 root root 219047658 Nov 1 18:14 full.1.dar
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# dar -x full -R DST -q
devuan:/mnt/memdisk# dar -x diff -R DST -q -w
devuan:/mnt/memdisk# diff -r SRC DST && echo "same data" || echo "different data"
same data
devuan:/mnt/memdisk#
We can see that the restoration of the full and differential backup over it lead to the exact same directory tree as the source saved files.
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# rsync -arHAXz --delete --info=stats SRC/* BACKUP
sent 214,380,105 bytes received 1,359,591 bytes 9,180,412.60 bytes/sec
total size is 954,869,250 speedup is 4.43
devuan:/mnt/memdisk# cd SRC/linux-5.9.2/
devuan:/mnt/memdisk/SRC/linux-5.9.2# mv Documentation/ doc
devuan:/mnt/memdisk/SRC/linux-5.9.2# cd ../..
devuan:/mnt/memdisk# rsync -arHAXz --delete --info=stats SRC/* BACKUP
sent 12,923,292 bytes received 680,190 bytes 3,886,709.14 bytes/sec
total size is 954,869,250 speedup is 70.19
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# rsync -arHAXz --delete --info=stats BACKUP/* DST
sent 214,371,610 bytes received 1,359,603 bytes 9,180,051.62 bytes/sec
total size is 954,869,250 speedup is 4.43
devuan:/mnt/memdisk#
We see that after the modification the amount of data pushed to the backup by rsync passes from 214 MiB to only 12 MiB we can consider this as a differential backup, thus this part of the multi-level backup aspect is addressed, but we have lost the access to the first backup: it has been overwritten by the new one, so we lose history but that's a different feature.
devuan:/mnt/memdisk# tar --listed-incremental=snapshot.file -czf full.tar.gz SRC
devuan:/mnt/memdisk# cd SRC/linux-5.9.2/
devuan:/mnt/memdisk/SRC/linux-5.9.2# mv Documentation/ doc
devuan:/mnt/memdisk/SRC/linux-5.9.2# cd ../..
devuan:/mnt/memdisk# tar --listed-incremental=snapshot.file -czf diff.tar.gz SRC
devuan:/mnt/memdisk# ls -l
total 190488
drwxr-xr-x 3 root root 60 Oct 31 19:37 SRC
-rw-r--r-- 1 root root 9654445 Oct 31 19:49 diff.tar.gz
-rw-r--r-- 1 root root 184036391 Oct 31 19:49 full.tar.gz
-rw-r--r-- 1 root root 1361962 Oct 31 19:49 snapshot.file
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# cd DST
devuan:/mnt/memdisk/DST# tar --listed-incremental=/dev/null -xf ../full.tar.gz
devuan:/mnt/memdisk/DST# tar --listed-incremental=/dev/null -xf ../diff.tar.gz
devuan:/mnt/memdisk/DST# cd ..
devuan:/mnt/memdisk# diff -r SRC DST/SRC && echo "same data" || echo "different data"
same data
devuan:/mnt/memdisk#
Here too, we got the exact same directory as original and modified data
To evaluate the ability the support for binary delta, we will make a first backup of a Debian ISO image, of which we will modify one bit using the bitflip script, then make a differential backup of it. We expect to see the differential backup not resaving the whole file, and though the restoration of the full and differential backup matching the modified file.
devuan:/mnt/memdisk# dar -c full -z6 -R SRC --delta sig -q
devuan:/mnt/memdisk# ./bitflip 100000 SRC/debian-10.6.0-amd64-DVD-2.iso
devuan:/mnt/memdisk# dar -c diff -A full -z6 -R SRC -q
devuan:/mnt/memdisk# ls -l *.dar
-rw-r--r-- 1 root root 643 Nov 1 19:45 diff.1.dar
-rw-r--r-- 1 root root 4704429776 Nov 1 19:05 full.1.dar
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# dar -x full -R DST -q
devuan:/mnt/memdisk# dar -x diff -R DST -q
devuan:/mnt/memdisk# diff -s SRC/debian-10.6.0-amd64-DVD-2.iso DST/debian-10.6.0-amd64-DVD-2.iso
Files SRC/debian-10.6.0-amd64-DVD-2.iso and DST/debian-10.6.0-amd64-DVD-2.iso are identical
devuan:/mnt/memdisk#
For dar the backup we used:
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# rsync -arHAX --info=stats SRC/* DST
sent 4,688,066,109 bytes received 35 bytes 284,125,220.85 bytes/sec
total size is 4,686,921,728 speedup is 1.00
devuan:/mnt/memdisk# ./bitflip 100000 SRC/debian-10.6.0-amd64-DVD-2.iso
devuan:/mnt/memdisk# rsync -arHAX --info=stats SRC/* DST
sent 4,688,066,109 bytes received 35 bytes 302,455,880.26 bytes/sec
total size is 4,686,921,728 speedup is 1.00
devuan:/mnt/memdisk# rsync -arHAXt --info=stats --no-whole-file SRC/* DST
sent 342,469 bytes received 547,803 bytes 30,178.71 bytes/sec
total size is 4,686,921,728 speedup is 5,264.60
devuan:/mnt/memdisk# diff -s SRC/debian-10.6.0-amd64-DVD-2.iso DST/debian-10.6.0-amd64-DVD-2.iso
Files SRC/debian-10.6.0-amd64-DVD-2.iso and DST/debian-10.6.0-amd64-DVD-2.iso are identical
devuan:/mnt/memdisk#
We had to use --no-whole-file
to see the binary delta in action with
rsync. This feature is not activated when copying on local disk as it
does not makes sense (for rsync) because the computation time needed for the
binary delta takes more time the the byte to byte copy and because rsync does
not store just the delta (no backup history) but modifies the existing backup.
Anyway, binary delta is supported (of course!) by rsync.
devuan:/mnt/memdisk# tar --listed-incremental=snapshot.file -czf full.tar.gz SRC
devuan:/mnt/memdisk# ./bitflip 100000 SRC/debian-10.6.0-amd64-DVD-2.iso
devuan:/mnt/memdisk# tar --listed-incremental=snapshot.file -czf diff.tar.gz SRC
devuan:/mnt/memdisk# ls -l
total 9133304
drwxr-xr-x 2 root root 40 Oct 31 17:31 SRC
-rwxr--r-- 1 root root 460 Oct 31 16:34 bitflip
-rw-r--r-- 1 root root 4676243904 Oct 31 17:28 diff.tar.gz
-rw-r--r-- 1 root root 4676244172 Oct 31 17:24 full.tar.gz
-rw-r--r-- 1 root root 107 Oct 31 17:28 snapshot.file
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# cd DST
devuan:/mnt/memdisk/DST# tar --listed-incremental=/dev/null -xf ../full.tar.gz
devuan:/mnt/memdisk/DST# tar --listed-incremental=/dev/null -xf ../diff.tar.gz
devuan:/mnt/memdisk/DST# diff -s ../SRC/debian-10.6.0-amd64-DVD-2.iso SRC/debian-10.6.0-amd64-DVD-2.iso
Files ../SRC/debian-10.6.0-amd64-DVD-2.iso and SRC/debian-10.6.0-amd64-DVD-2.iso are identical
devuan:/mnt/memdisk/DST#
For tar the backup used:
Binary delta is not supported by tar
For this test we will use the hide_change script that rely on the bitflip script seen above and try to hide the modifications performed, as a virus, keylogger or rootkit would tend to do. We will make a full backup before the modification and a differential backup after, then observe the behavior.
Here follows the script in action, we see no change using ls -l
while stat
shows the exact same information:
terre:/mnt/memdisk# mkdir SRC
terre:/mnt/memdisk# echo "Hello World!" > SRC/file.txt
terre:/mnt/memdisk# cat SRC/file.txt
Hello World!
terre:/mnt/memdisk# ls -l SRC/file.txt
-rw-r--r-- 1 root root 13 Nov 12 13:13 SRC/file.txt
terre:/mnt/memdisk# stat SRC/file.txt
File: SRC/file.txt
Size: 13 Blocks: 8 IO Block: 4096 regular file
Device: 1bh/27d Inode: 424690 Links: 1
Access: (0644/-rw-r--r--) Uid: ( 0/ root) Gid: ( 0/ root)
Access: 2020-11-12 13:13:19.021978762 +0100
Modify: 2020-11-12 13:13:09.213998852 +0100
Change: 2020-11-12 13:13:09.213998852 +0100
Birth: -
terre:/mnt/memdisk# ./hide_change SRC/file.txt
terre:/mnt/memdisk# ls -l SRC/file.txt
-rw-r--r-- 1 root root 13 Nov 12 13:13 SRC/file.txt
terre:/mnt/memdisk# stat SRC/file.txt
File: SRC/file.txt
Size: 13 Blocks: 8 IO Block: 4096 regular file
Device: 1bh/27d Inode: 424690 Links: 1
Access: (0644/-rw-r--r--) Uid: ( 0/ root) Gid: ( 0/ root)
Access: 2020-11-12 13:13:19.021978762 +0100
Modify: 2020-11-12 13:13:09.213998852 +0100
Change: 2020-11-12 13:13:39.549936636 +0100
Birth: -
terre:/mnt/memdisk# cat SRC/file.txt
L ello World!
terre:/mnt/memdisk#
terre:/mnt/memdisk# mkdir SRC
terre:/mnt/memdisk# echo "Hello World!" > SRC/file.txt
terre:/mnt/memdisk# dar -c full -R SRC -N
--------------------------------------------
1 inode(s) saved
including 0 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
0 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
0 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 1
--------------------------------------------
EA saved for 0 inode(s)
FSA saved for 0 inode(s)
--------------------------------------------
terre:/mnt/memdisk# ./hide_change SRC/file.txt
terre:/mnt/memdisk# dar -c diff -A full -R SRC -N -q
SECURITY WARNING! SUSPICIOUS FILE /mnt/memdisk/SRC/file.txt: ctime changed since archive of reference was done, while no other inode information changed
terre:/mnt/memdisk#
dar issues a warning because of this suspicious condition. Note that we still have the sane file in the full backup, in case of doubt, we can compare it with this modified version:
terre:/mnt/memdisk# dar -d full -R SRC -q
DIFF /mnt/memdisk/SRC/file.txt: different file data, offset of first difference is: 0
Some file comparisons failed
terre:/mnt/memdisk#
The previous test reports that the first byte to have changed is at offset 0, thus this is not just a metadata change that lead to this warning. We can if necessary restore the sane data from the full backup.
terre:/mnt/memdisk# rm -rf SRC
terre:/mnt/memdisk# mkdir SRC
terre:/mnt/memdisk# echo "Hello World!" > SRC/file.txt
terre:/mnt/memdisk# rsync -arvHAX SRC DST
sending incremental file list
created directory DST
SRC/
SRC/file.txt
sent 146 bytes received 65 bytes 422.00 bytes/sec
total size is 13 speedup is 0.06
terre:/mnt/memdisk# ./hide_change SRC/file.txt
terre:/mnt/memdisk# rsync -arvHAX SRC DST
sending incremental file list
sent 83 bytes received 13 bytes 192.00 bytes/sec
total size is 13 speedup is 0.14
terre:/mnt/memdisk# cat SRC/file.txt
Lello World!
terre:/mnt/memdisk# cat DST/SRC/file.txt
Hello World!
terre:/mnt/memdisk#
rsync has not reported the problem, but hopefully it has not synchronized the backup, thus we end in a sane version in the DST backup directory though, as user is not aware of this potential risk, the virus/ransomware can spread silently.
terre:/mnt/memdisk# rm -rf SRC
terre:/mnt/memdisk# rm -rf DST
terre:/mnt/memdisk# mkdir SRC
terre:/mnt/memdisk# echo "Hello World!" > SRC/file.txt
terre:/mnt/memdisk# tar --listed-incremental=snapshot.file -cf full.tar SRC
terre:/mnt/memdisk# ./hide_change SRC/file.txt
terre:/mnt/memdisk# tar --listed-incremental=snapshot.file -cvf diff.tar SRC
SRC/
SRC/file.txt
terre:/mnt/memdisk# mkdir DST
terre:/mnt/memdisk# cd DST
terre:/mnt/memdisk/DST# tar -xf ../full.tar
terre:/mnt/memdisk/DST# cat SRC/file.txt
Hello World!
terre:/mnt/memdisk/DST# tar -xf ../diff.tar
terre:/mnt/memdisk/DST# cat SRC/file.txt
Lello World!
terre:/mnt/memdisk/DST#
As seen above tar does not see any problem, but the file has been resaved as a whole (while its last modification time was unchanged) which lead to corrupt the new backup with potential harmful data. The good point is that you have still the full backup with the sane data. But at a next backup cycle, as you were not notified of the risk, you will lose it and keep only the corrupted version of this file.
terre:/mnt/memdisk# dar -c full -z6 -R /usr --on-fly-isolate snapshot
--------------------------------------------
267245 inode(s) saved
including 23 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
0 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
0 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 267245
--------------------------------------------
EA saved for 5 inode(s)
FSA saved for 237962 inode(s)
--------------------------------------------
Now performing on-fly isolation...
terre:/mnt/memdisk# ls -l *.dar
-rw-r--r-- 1 root root 4006060941 Nov 12 15:34 full.1.dar
-rw-r--r-- 1 root root 6662595 Nov 12 15:34 snapshot.1.dar
terre:/mnt/memdisk# dar -C recreated_snapshot -A full -z6 -q
terre:/mnt/memdisk# ls -al *.dar
-rw-r--r-- 1 root root 4006060941 Nov 12 15:34 full.1.dar
-rw-r--r-- 1 root root 7907094 Nov 12 16:33 recreated_snapshot.1.dar
-rw-r--r-- 1 root root 6662595 Nov 12 15:34 snapshot.1.dar
terre:/mnt/memdisk# dar -c diff -A snapshot -R /usr -z
--------------------------------------------
23 inode(s) saved
including 23 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
267222 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
0 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 267245
--------------------------------------------
EA saved for 0 inode(s)
FSA saved for 0 inode(s)
--------------------------------------------
terre:/mnt/memdisk#
terre:/mnt/memdisk# ls -lh *.dar
-rw-r--r-- 1 root root 25M Nov 12 16:37 diff.1.dar
-rw-r--r-- 1 root root 3.8G Nov 12 15:34 full.1.dar
-rw-r--r-- 1 root root 7.6M Nov 12 16:33 recreated_snapshot.1.dar
-rw-r--r-- 1 root root 6.4M Nov 12 15:34 snapshot.1.dar
terre:/mnt/memdisk# dar -c diff2 -A recreated_snapshot -R /usr -z
--------------------------------------------
23 inode(s) saved
including 23 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
267222 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
0 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 267245
--------------------------------------------
EA saved for 0 inode(s)
FSA saved for 0 inode(s)
--------------------------------------------
terre:/mnt/memdisk# dar -c snapshot_alone -A + -R /usr -z
--------------------------------------------
23 inode(s) saved
including 23 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
267222 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
0 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 267245
--------------------------------------------
EA saved for 0 inode(s)
FSA saved for 0 inode(s)
--------------------------------------------
terre:/mnt/memdisk# touch /usr/local/src
terre:/mnt/memdisk# dar -c faked_diff -A snapshot -R /usr --dry-run -q -vt
Adding folder to archive: /usr/local/src
Saving Filesystem Specific Attributes for /usr/local/src
terre:/mnt/memdisk# ls -l *.dar
-rw-r--r-- 1 root root 25537139 Nov 12 16:37 diff.1.dar
-rw-r--r-- 1 root root 25537139 Nov 12 16:39 diff2.1.dar
-rw-r--r-- 1 root root 4006060941 Nov 12 15:34 full.1.dar
-rw-r--r-- 1 root root 7907094 Nov 12 16:33 recreated_snapshot.1.dar
-rw-r--r-- 1 root root 6662595 Nov 12 15:34 snapshot.1.dar
-rw-r--r-- 1 root root 25537142 Nov 12 16:44 snapshot_alone.1.dar
terre:/mnt/memdisk#
As seen above,a snapshot can be created:
--on-fly-isolate
)-C
)-A +
)
root@terre:/mnt/memdisk# ls -l full.1.dar
-rw-r--r-- 1 root root 3895581703 Nov 29 21:53 full.1.dar
root@terre:/mnt/memdisk# dar -l full -q
FATAL error, aborting operation: Cannot open catalogue: unknown compression
root@terre:/mnt/memdisk# !bitflip
bitflip 31124653000 full.1.dar
root@terre:/mnt/memdisk# dar -l full -q
Archive version format : 11
Compression algorithm used : gzip
Compression block size used : 0
Symmetric key encryption used : none
Asymmetric key encryption used : none
Archive is signed : no
Sequential reading marks : present
User comment : N/A
Catalogue size in archive : 7799028 bytes
Archive is composed of 1 file(s)
File size: 3895581703 bytes
The global data compression ratio is: 51%
CATALOGUE CONTENTS :
total number of inode : 263480
fully saved : 263480
binay delta patch : 0
inode metadata only : 0
distribution of inode(s)
- directories : 18080
- plain files : 216142
- symbolic links : 29258
- named pipes : 0
- unix sockets : 0
- character devices : 0
- block devices : 0
- Door entries : 0
hard links information
- number of inode with hard link : 11
- number of reference to hard linked inodes: 34
destroyed entries information
0 file(s) have been record as destroyed since backup of reference
root@terre:/mnt/memdisk# bitflip 31124653000 full.1.dar
root@terre:/mnt/memdisk# dar -t full
Final memory cleanup...
FATAL error, aborting operation: Cannot open catalogue: unknown compression
root@terre:/mnt/memdisk# dar -t full -A snapshot
--------------------------------------------
263503 item(s) treated
0 item(s) with error
0 item(s) ignored (excluded by filters)
--------------------------------------------
Total number of items considered: 263503
--------------------------------------------
root@terre:/mnt/memdisk#
root@terre:/mnt/memdisk# dar -t full --sequential-read
A problem occurred while reading this archive contents: Cannot open catalogue: unknown compression
--------------------------------------------
263503 item(s) treated
0 item(s) with error
0 item(s) ignored (excluded by filters)
--------------------------------------------
Total number of items considered: 263503
--------------------------------------------
root@terre:/mnt/memdisk#
Once created a snapshot can be used:
--dry-run
option)-C option
maybe long after the backup was made
or using --on-fly-isolate
at the same time the backup was created.
--sequential-read
mode, it will just not let dar remove files that
were removed since the reference backup was made (this does thus not concern full backups, as here).
This feature is not supported by rsync.
tar can generate snapshot:
terre:/mnt/memdisk# tar --listed-incremental=snapshot.file -czf full.tar.gz /usr
tar: Removing leading `/' from member names
tar: Removing leading `/' from hard link targets
terre:/mnt/memdisk# ls -l snapshot.file
-rw-r--r-- 1 root root 6288644 Nov 12 15:12 snapshot.file
terre:/mnt/memdisk# cp snapshot.file snapshot.file.ref
tar --listed-incremental=snapshot.file -cvf /dev/null /usr
/usr/
/usr/bin/
/usr/games/
/usr/include/
/usr/include/X11/
/usr/include/X11/bitmaps/
/usr/include/arpa/
/usr/include/asm-generic/
/usr/include/attr/
/usr/include/c++/
/usr/include/c++/
[...]
/usr/share/zoneinfo/right/Canada/
/usr/share/zoneinfo/right/Chile/
/usr/share/zoneinfo/right/Etc/
/usr/share/zoneinfo/right/Europe/
/usr/share/zoneinfo/right/Indian/
/usr/share/zoneinfo/right/Mexico/
/usr/share/zoneinfo/right/Pacific/
/usr/share/zoneinfo/right/SystemV/
/usr/share/zoneinfo/right/US/
/usr/share/zsh/
/usr/share/zsh/site-functions/
/usr/share/zsh/vendor-completions/
/usr/src/
terre:/mnt/memdisk# ls -l sna
snapshot.file snapshot.file.ref
terre:/mnt/memdisk# ls -l snapshot.file*
-rw-r--r-- 1 root root 6288644 Nov 12 15:20 snapshot.file
-rw-r--r-- 1 root root 6288644 Nov 12 15:18 snapshot.file.ref
terre:/mnt/memdisk#
If a snapshot can be used (and is in fact required) to make a differential backup, it cannot really be used to see the difference a current living filesystem has with a given snapshot. Worse, doing so modifies the snapshot, so you have first to make a copy to not screw up your backup process. Worse, if incremental backup fails and you have not created a copy of the backup, your snapshot being modified you will mostly have to remake the whole backup process from the full backup to be sure to not miss backing up some modified files. Same thing if you lose by mistake the snapshot file.
terre:/mnt/memdisk# dar -c backup -R /usr -g usr/bin -z6 --hash sha1
--------------------------------------------
0 inode(s) saved
including 0 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
0 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
8 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 8
--------------------------------------------
EA saved for 0 inode(s)
FSA saved for 0 inode(s)
--------------------------------------------
terre:/mnt/memdisk# ls -l *.dar*
-rw-r--r-- 1 root root 171 Nov 12 17:22 backup.1.dar
-rw-r--r-- 1 root root 55 Nov 12 17:22 backup.1.dar.sha1
terre:/mnt/memdisk# sha1sum -c backup.1.dar.sha1
backup.1.dar: OK
terre:/mnt/memdisk#
not supported by rsync
not supported by tar
As an example (but there is much more thing that can be done), we take the case of a automounted directory. Such type of volume is mounted only when used, if not used no mount point directory shows and unless you know it exists, no backup of its content is performed. The idea, is when entering the parent directory at backup process to trigger the mount point for the backup to include them.
terre:/mnt/memdisk# cat /etc/auto.mnt
Espace -defaults,relatime,acl,bg,rsize=8192,wsize=8192 nfs.systeme-solaire.espace:/mnt/Externe/Espace
Commun -defaults,relatime,acl,bg,rsize=8192,wsize=8192,ro nfs.systeme-solaire.espace:/mnt/Externe/Commun
Backup -defaults,relatime,acl,bg,rsize=8192,wsize=8192,ro nfs.systeme-solaire.espace:/mnt/Backup
terre:/mnt/memdisk# ls -l /mnt/Externe/
total 4
drwxr-xr-x 7 root root 4096 Jul 14 17:58 Espace
terre:/mnt/memdisk# dar -c backup -R / -g /mnt -q
terre:/mnt/memdisk# dar -l backup
[Data ][D][ EA ][FSA][Compr][S]| Permission | User | Group | Size | Date | filename
--------------------------------+------------+-------+-------+---------+-------------------------------+------------
[Saved][-] [-L-][ ][ ] drwxr-xr-x 0 0 0 Wed Oct 21 18:17:07 2020 mnt
[Saved][-] [-L-][ ][ ] drwxr-xr-x 1000 1002 0 Mon Nov 9 11:56:54 2020 mnt/localdisk
[Saved][-] [---][-----][ ] lrwxrwxrwx 0 0 0 Thu Aug 15 23:29:46 2019 mnt/Backup
[Saved][-] [---][ ][ ] drwxr-xr-x 0 0 0 Thu Nov 12 17:42:11 2020 mnt/Externe
[Saved][-][Saved][---][ ][ ] drwxr-xr-x 0 0 0 Tue Jul 14 17:58:57 2020 mnt/Externe/Espace
terre:/mnt/memdisk#
terre:/mnt/memdisk# rm backup.1.dar
terre:/mnt/memdisk# dar -c backup -R / -g mnt -q '-<' mnt '-=' 'file %p/Externe/Backup %p/Externe/Commun'
/mnt/Externe/Backup: directory
/mnt/Externe/Commun: directory
terre:/mnt/memdisk# dar -l backup
[Data ][D][ EA ][FSA][Compr][S]| Permission | User | Group | Size | Date | filename
--------------------------------+------------+-------+-------+---------+-------------------------------+------------
[Saved][-] [-L-][ ][ ] drwxr-xr-x 0 0 0 Wed Oct 21 18:17:07 2020 mnt
[Saved][-] [-L-][ ][ ] drwxr-xr-x 1000 1002 0 Mon Nov 9 11:56:54 2020 mnt/localdisk
[Saved][-] [---][-----][ ] lrwxrwxrwx 0 0 0 Thu Aug 15 23:29:46 2019 mnt/Backup
[Saved][-] [---][ ][ ] drwxr-xr-x 0 0 0 Thu Nov 12 18:01:41 2020 mnt/Externe
[Saved][-][Saved][---][ ][ ] drwxr-x--- 993 1002 0 Wed Nov 11 10:21:55 2015 mnt/Externe/Commun
[Saved][-][Saved][---][ ][ ] drwxr-xr-x 0 0 0 Sun Sep 13 12:22:24 2020 mnt/Externe/Backup
[Saved][-][Saved][---][ ][ ] drwxr-xr-x 0 0 0 Tue Jul 14 17:58:57 2020 mnt/Externe/Espace
terre:/mnt/memdisk# ls -l /mnt/Externe/
total 12
drwxr-xr-x 9 root root 4096 Sep 13 12:22 Backup
drwxr-x--- 4 commun maison 4096 Nov 11 2015 Commun
drwxr-xr-x 7 root root 4096 Jul 14 17:58 Espace
terre:/mnt/memdisk#
In the previous example we see that the /mnt/Externe directory is a mount point containing three auto-mounted
volumes: Espace
, Commun
and Backup
. At first only Espace
was mounted. Performing a backup without care will skip the two other directories.
In a second time, thanks to the -<
and -=
options, we instructed dar to
run the file
command on the two missing directories when entering /mnt
.
As a result, we now see both of them in the backup. We could do that before executing the backup, but as
the backup may include many other
directories the time between such operation done before starting the backup and the time the backup finally
saves the automount point at /mnt/Externe
may exceed the automount timeout leading them to be
unmounted and disappear before the backup process reaches them.
terre:/mnt/memdisk# dar -c backup -R / -g usr/bin --hash sha512 -s 100M -q
terre:/mnt/memdisk# ls -l backup.*
-rw-r--r-- 1 root root 104857600 Nov 12 18:30 backup.1.dar
-rw-r--r-- 1 root root 143 Nov 12 18:30 backup.1.dar.sha512
-rw-r--r-- 1 root root 104857600 Nov 12 18:30 backup.2.dar
-rw-r--r-- 1 root root 143 Nov 12 18:30 backup.2.dar.sha512
-rw-r--r-- 1 root root 104857600 Nov 12 18:30 backup.3.dar
-rw-r--r-- 1 root root 143 Nov 12 18:30 backup.3.dar.sha512
-rw-r--r-- 1 root root 63577207 Nov 12 18:30 backup.4.dar
-rw-r--r-- 1 root root 143 Nov 12 18:30 backup.4.dar.sha512
terre:/mnt/memdisk# dar -t backup -E 'sha512sum -c %p/%b.%N.%e.sha512'
backup.4.dar: OK
backup.1.dar: OK
backup.2.dar: OK
backup.3.dar: OK
backup.4.dar: OK
--------------------------------------------
2594 item(s) treated
0 item(s) with error
0 item(s) ignored (excluded by filters)
--------------------------------------------
Total number of items considered: 2594
--------------------------------------------
terre:/mnt/memdisk#
In this example, this we used slicing with on-fly hashing which generated for each slice
the corresponding sha512 hash file. Then we tested the archive content and at the same time the hash
files thanks to the -E
option. Of course any user command or shell or python script,
can be used instead, and for backup, restoration, testing, snashotting,...
not supported by rsync
tar has the -F option
to launch a command after each tape,
but it is only available with multi-volume tar archive, which in turn
cannot be used with compression. Thus we won't test it, as it is quite
restrictive and does not match any common use cases.
terre:/mnt/memdisk/A# ls -l
total 0
terre:/mnt/memdisk/A# dar -c backup -R / -g usr/bin --dry-run
--------------------------------------------
2594 inode(s) saved
including 5 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
0 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
34 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 2628
--------------------------------------------
EA saved for 3 inode(s)
FSA saved for 2154 inode(s)
--------------------------------------------
terre:/mnt/memdisk/A# ls -l
total 0
terre:/mnt/memdisk/A#
terre:/mnt/memdisk# rsync -arHAX --dry-run /usr/bin DST
terre:/mnt/memdisk# ls -l DST
ls: cannot access 'DST': No such file or directory
terre:/mnt/memdisk#
does not seem supported by tar
terre:/mnt/memdisk# dar -c backup --user-comment "passphrase is the usual one. Archive was made on %d on host %h" -R / -g usr/bin -K camellia: -zxz -s 100M
Archive backup requires a password:
Please confirm your password:
--------------------------------------------
2594 inode(s) saved
including 5 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
0 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
34 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 2628
--------------------------------------------
EA saved for 3 inode(s)
FSA saved for 2154 inode(s)
--------------------------------------------
terre:/mnt/memdisk# dar -l backup -aheader
Archive version format : 11
Compression algorithm used : xz
Compression block size used : 0
Symmetric key encryption used : camellia 256
Asymmetric key encryption used : none
Archive is signed : no
Sequential reading marks : present
User comment : passphrase is the usual one. Archive was made on Thu Nov 12 18:57:35 2020 on host terre
KDF iteration count : 10000
KDF hash algorithm : argon2
Salt size : 32 bytes
Final memory cleanup...
FATAL error, aborting operation: header only mode asked
terre:/mnt/memdisk#
The use of the -aheader
let one see the archive header that is always in clear-text. The usual listing
operation provides some additional informations from the ciphered table of content and thus in that context requires the passphrase:
terre:/mnt/memdisk# dar -l backup -q
Archive backup requires a password:
Warning, the archive backup has been encrypted. A wrong key is not possible to detect, it would cause DAR to report the archive as corrupted
Archive version format : 11
Compression algorithm used : xz
Compression block size used : 0
Symmetric key encryption used : camellia 256
Asymmetric key encryption used : none
Archive is signed : no
Sequential reading marks : present
User comment : passphrase is the usual one. Archive was made on Thu Nov 12 18:57:35 2020 on host terre
KDF iteration count : 10000
KDF hash algorithm : argon2
Salt size : 32 bytes
Catalogue size in archive : 78268 bytes
Archive is composed of 2 file(s)
File size : 104857600 bytes
Last file size : 17168696 bytes
Archive total size is : 122026296 bytes
The global data compression ratio is: 72%
CATALOGUE CONTENTS :
total number of inode : 2589
fully saved : 2589
binay delta patch : 0
inode metadata only : 0
distribution of inode(s)
- directories : 2
- plain files : 2152
- symbolic links : 435
- named pipes : 0
- unix sockets : 0
- character devices : 0
- block devices : 0
- Door entries : 0
hard links information
- number of inode with hard link : 5
- number of reference to hard linked inodes: 10
destroyed entries information
0 file(s) have been record as destroyed since backup of reference
terre:/mnt/memdisk#
not supported by rsync
not supported by tar
terre:/mnt/memdisk# dar -c backup -R / -g usr/bin -zlz4
--------------------------------------------
2594 inode(s) saved
including 5 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
0 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
34 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 2628
--------------------------------------------
EA saved for 3 inode(s)
FSA saved for 2154 inode(s)
--------------------------------------------
terre:/mnt/memdisk# dar -t backup
--------------------------------------------
2594 item(s) treated
0 item(s) with error
0 item(s) ignored (excluded by filters)
--------------------------------------------
Total number of items considered: 2594
--------------------------------------------
terre:/mnt/memdisk#
It does not seems possible to let rsync check that the target or destination
directory is sane and usuable. All operation modify the destination file or save
modified files in either the destination directory (the backup) or an alternate directory
(--compare-dest
option).
terre:/mnt/memdisk# rm -rf backup.tar.gz
terre:/mnt/memdisk# tar -czf backup.tar.gz /usr/bin
tar: Removing leading `/' from member names
tar: Removing leading `/' from hard link targets
terre:/mnt/memdisk# tar -tzf backup.tar.gz
usr/bin/
usr/bin/bitmap
usr/bin/dot
usr/bin/indi_usbdewpoint
usr/bin/ruby2.5
usr/bin/pod2man
usr/bin/iptables-xml
usr/bin/knotify4
usr/bin/fakeroot
usr/bin/xclock
[...]
/bin/traceproto
usr/bin/ofm2opl
usr/bin/akonadi_archivemail_agent
usr/bin/resizecons
usr/bin/rletopnm
usr/bin/dh_install
usr/bin/updvitomp
usr/bin/h2xs
usr/bin/xmessage
terre:/mnt/memdisk# echo $?
0
terre:/mnt/memdisk#
terre:/mnt/memdisk# dar -c backup -R SRC -q
terre:/mnt/memdisk# dar -d backup -R SRC
--------------------------------------------
2594 item(s) treated
0 item(s) do not match those on filesystem
0 item(s) ignored (excluded by filters)
--------------------------------------------
Total number of items considered: 2594
--------------------------------------------
terre:/mnt/memdisk# echo $?
0
terre:/mnt/memdisk#
Does not seems supported by rsync
terre:/mnt/memdisk/SRC# tar -czf ../backup.tar.gz .
terre:/mnt/memdisk/SRC# tar -dzf ../backup.tar.gz
terre:/mnt/memdisk/SRC# echo $?
0
terre:/mnt/memdisk/SRC#
terre:/mnt/memdisk# dar -c backup -R / -g usr/bin -q
terre:/mnt/memdisk# rm backup.1.dar
terre:/mnt/memdisk# dar -c backup -R / -g usr/bin
--------------------------------------------
2594 inode(s) saved
including 5 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
0 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
34 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 2628
--------------------------------------------
EA saved for 3 inode(s)
FSA saved for 2154 inode(s)
--------------------------------------------
terre:/mnt/memdisk# dar -c backup -R / -g usr/bin -vm
Arguments read from /usr/local/etc/darrc :
Creating low layer: Writing archive into a plain file object...
Adding a new layer on top: Caching layer for better performances...
Writing down the archive header...
Adding a new layer on top: Escape layer to allow sequential reading...
All layers have been created successfully
Building the catalog object...
Processing files for backup...
Writing down archive contents...
Closing the escape layer...
Writing down the first archive terminator...
Writing down archive trailer...
Writing down the second archive terminator...
Closing archive low layer...
Archive is closed.
--------------------------------------------
2594 inode(s) saved
including 5 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
0 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
34 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 2628
--------------------------------------------
EA saved for 3 inode(s)
FSA saved for 2154 inode(s)
--------------------------------------------
Making room in memory (releasing memory used by archive of reference)...
Final memory cleanup...
terre:/mnt/memdisk# rm -f backup*
terre:/mnt/memdisk# dar -c backup -R / -g usr/bin -vt -q
Adding folder to archive: /usr
Saving Filesystem Specific Attributes for /usr
Adding folder to archive: /usr/bin
Saving Filesystem Specific Attributes for /usr/bin
Adding file to archive: /usr/bin/bitmap
Saving Filesystem Specific Attributes for /usr/bin/bitmap
[...]
Saving Filesystem Specific Attributes for /usr/bin/dh_install
Adding symlink to archive: /usr/bin/updvitomp
Adding file to archive: /usr/bin/h2xs
Saving Filesystem Specific Attributes for /usr/bin/h2xs
Adding file to archive: /usr/bin/xmessage
Saving Filesystem Specific Attributes for /usr/bin/xmessage
terre:/mnt/memdisk# rm -f backup*
terre:/mnt/memdisk# dar -c backup -R / -g usr/bin -vd
Inspecting directory /root
Inspecting directory /bin
Inspecting directory /sbin
Inspecting directory /tmp
Inspecting directory /sys
Inspecting directory /lib
[...]
Inspecting directory /var
Inspecting directory /proc
Inspecting directory /dev
Inspecting directory /etc
Inspecting directory /media
Inspecting directory /run
terre:/mnt/memdisk#
terre:/mnt/memdisk# rm -f backup.*
terre:/mnt/memdisk# dar -c backup -R / -g usr/bin -vf -q
Finished Inspecting directory /usr/bin , saved 408 Mio, compression ratio 13%
Finished Inspecting directory /usr , saved 408 Mio, compression ratio 13%
terre:/mnt/memdisk#
terre:/mnt/memdisk# dar -c backup -R / -g usr/bin -vmasks -q
directory tree filter:
AND
| OR
| | Is subdir of: /usr/bin [case sensitive]
| +--
+--
filename filter:
AND
| TRUE
+--
EA filter:
AND
| TRUE
+--
Compression filter:
TRUE
terre:/mnt/memdisk#
dar has several options to define which type of message to show or not to show:
-v, -vs, -vt, -vd, -vf, -vm, -vmasks, -q
. They can be combined.
terre:/mnt/memdisk# rsync -arHAX /usr/bin DST
terre:/mnt/memdisk# rm -rf DST
terre:/mnt/memdisk# rsync -arHAX -v /usr/bin DST
sending incremental file list
created directory DST
bin/
bin/2to3-2.7
bin/411toppm
bin/7z
bin/7za
bin/7zr
bin/FvwmCommand
[...]
bin/zstdmt -> zstd
bin/perl => bin/perl5.28.1
bin/perlbug => bin/perlthanks
bin/python3.7 => bin/python3.7m
bin/pkg-config => bin/x86_64-pc-linux-gnu-pkg-config
bin/unzip => bin/zipinfo
sent 437,298,617 bytes received 42,381 bytes 174,936,399.20 bytes/sec
total size is 445,394,557 speedup is 1.02
root@terre:/mnt/memdisk# rsync -arHAX --info=progress2 /usr/bin DST
437,083,500 98% 128.42MB/s 0:00:03 (xfr#2152, to-chk=0/2593)
root@terre:/mnt/memdisk#
-v option
leads to a more verbose output, while -q
remove the non error messages. Using both at the same time seems not to
be different than using -q
alone. However rsync has a very rich
set of additional options like --info
, --debug
that can be added on top.
terre:/mnt/memdisk# tar -czf backup.tar.gz /usr/bin
tar: Removing leading `/' from member names
tar: Removing leading `/' from hard link targets
terre:/mnt/memdisk# rm backup.tar.gz
terre:/mnt/memdisk# tar -v -czf backup.tar.gz /usr/bin
tar: Removing leading `/' from member names
/usr/bin/
/usr/bin/bitmap
/usr/bin/dot
/usr/bin/indi_usbdewpoint
/usr/bin/ruby2.5
/usr/bin/pod2man
/usr/bin/iptables-xml
/usr/bin/knotify4
[...]
/usr/bin/traceproto
/usr/bin/ofm2opl
/usr/bin/akonadi_archivemail_agent
/usr/bin/resizecons
/usr/bin/rletopnm
/usr/bin/dh_install
/usr/bin/updvitomp
/usr/bin/h2xs
/usr/bin/xmessage
terre:/mnt/memdisk#
tar only provides the -v
option to increase
verbosity.
We will perform two types of tests:
terre:/mnt/memdisk# dar -c backup -R / -g usr/bin -z6 -q
terre:/mnt/memdisk# dar -l backup -g usr/bin/emacs-gtk
[Data ][D][ EA ][FSA][Compr][S]| Permission | User | Group | Size | Date | filename
--------------------------------+------------+-------+-------+---------+-------------------------------+------------
[Saved][-] [-L-][ 64%][ ] drwxr-xr-x root root 408 Mio Sun Jun 2 23:25:09 2019 usr
[Saved][-] [-L-][ 64%][ ] drwxr-xr-x root root 408 Mio Sun Nov 8 13:43:58 2020 usr/bin
[Saved][ ] [-L-][ 90%][X] -rwxr-xr-x root root 38 Mio Thu Sep 5 04:35:24 2019 usr/bin/emacs-gtk
terre:/mnt/memdisk# dar -A backup -+ without-emacs -ak -P usr/bin/emacs-gtk -vs -q
Skipping file: <ROOT>/usr/bin/emacs-gtk
terre:/mnt/memdisk# dar -l without-emacs -g usr/bin/emacs-gtk
[Data ][D][ EA ][FSA][Compr][S]| Permission | User | Group | Size | Date | filename
--------------------------------+------------+-------+-------+---------+-------------------------------+------------
[Saved][-] [-L-][ 62%][ ] drwxr-xr-x root root 370 Mio Sun Jun 2 23:25:09 2019 usr
[Saved][-] [-L-][ 62%][ ] drwxr-xr-x root root 370 Mio Sun Nov 8 13:43:58 2020 usr/bin
terre:/mnt/memdisk#rm backup.*
terre:/mnt/memdisk#mv without-emacs.1.dar backup.1.dar
terre:/mnt/memdisk#
dar does not modify a existing backup but creates a copy of it with the requested files or
directory removed. The process can be quick even with compression thanks to the -ak
option that avoid uncompressing and recompressing
file that are kept. Before removing the old backup you can test the sanity of the new generated one.
terre:/mnt/memdisk# dar -c emacs -R / -g usr/bin/emacs-gtk -z6 -q
terre:/mnt/memdisk# dar -l emacs
[Data ][D][ EA ][FSA][Compr][S]| Permission | User | Group | Size | Date | filename
--------------------------------+------------+-------+-------+---------+-------------------------------+------------
[Saved][-] [-L-][ 90%][ ] drwxr-xr-x root root 38 Mio Sun Jun 2 23:25:09 2019 usr
[Saved][-] [-L-][ 90%][ ] drwxr-xr-x root root 38 Mio Sun Nov 8 13:43:58 2020 usr/bin
[Saved][ ] [-L-][ 90%][X] -rwxr-xr-x root root 38 Mio Thu Sep 5 04:35:24 2019 usr/bin/emacs-gtk
terre:/mnt/memdisk# dar -A backup -@ emacs -+ with-emacs -ak
--------------------------------------------
2594 inode(s) added to archive
with 10 hard link(s) recorded
0 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted
--------------------------------------------
EA saved for 3 inode(s)
FSA saved for 2159 inode(s)
--------------------------------------------
Total number of inode(s) considered: 2594
--------------------------------------------
terre:/mnt/memdisk# dar -l with-emacs -g usr/bin/emacs-gtk
[Data ][D][ EA ][FSA][Compr][S]| Permission | User | Group | Size | Date | filename
--------------------------------+------------+-------+-------+---------+-------------------------------+------------
[Saved][-] [-L-][ 64%][ ] drwxr-xr-x root root 408 Mio Sun Jun 2 23:25:09 2019 usr
[Saved][-] [-L-][ 64%][ ] drwxr-xr-x root root 408 Mio Sun Nov 8 13:43:58 2020 usr/bin
[Saved][ ] [-L-][ 90%][X] -rwxr-xr-x root root 38 Mio Thu Sep 5 04:35:24 2019 usr/bin/emacs-gtk
terre:/mnt/memdisk# rm emacs.* backup.*
terre:/mnt/memdisk# mv with-emacs.1.dar backup.1.dar
terre:/mnt/memdisk#
Here to add files to a existing backup we must make a small backup of these files only, then merge this backup with the backup we want to modify. Nothing of the source data is touched in this operation, is something goes wrong or if you made an error, you can fix and restart without taking the risk to lose data.
The backup made by rsync is just a copy of the save files, removing a file from the backup
is as simple as calling rm
on that file in the repository that is considered the backup.
While adding a new file in the backup can be done by using rsync as usual including the directory tree where this file resides.
terre:/mnt/memdisk# tar -czf backup.tar.gz /usr/bin
tar: Removing leading `/' from member names
tar: Removing leading `/' from hard link targets
terre:/mnt/memdisk# tar -tvf backup.tar.gz | grep emacs-gtk
-rwxr-xr-x root/root 39926024 2019-09-05 04:35 usr/bin/emacs-gtk
terre:/mnt/memdisk# tar -tvf backup.tar.gz | grep emacs-gtk
-rwxr-xr-x root/root 39926024 2019-09-05 04:35 usr/bin/emacs-gtk
terre:/mnt/memdisk# tar --delete usr/bin/emacs-gtk -f backup.tar.gz
tar: Cannot update compressed archives
tar: Error is not recoverable: exiting now
terre:/mnt/memdisk#
Well, tar cannot manipulate compressed archives. What the point then to remove a file from a backup if storage space is not an issue, else, would compression be used?
terre:/mnt/memdisk# dar -c - -z6 -R SRC > backup.file
--------------------------------------------
2594 inode(s) saved
including 5 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
0 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
0 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 2594
--------------------------------------------
EA saved for 3 inode(s)
FSA saved for 0 inode(s)
--------------------------------------------
terre:/mnt/memdisk# rm -rf DST
terre:/mnt/memdisk# mkdir DST
terre:/mnt/memdisk# dar -x - --sequential-read -R DST < backup.file
--------------------------------------------
2594 inode(s) restored
including 5 hard link(s)
0 inode(s) not restored (not saved in archive)
0 inode(s) not restored (overwriting policy decision)
0 inode(s) ignored (excluded by filters)
0 inode(s) failed to restore (filesystem error)
0 inode(s) deleted
--------------------------------------------
Total number of inode(s) considered: 2594
--------------------------------------------
EA restored for 3 inode(s)
FSA restored for 0 inode(s)
--------------------------------------------
terre:/mnt/memdisk#
dar can read a backup from stdin and write a backup to stdout.
Using stdin/stdout to send to or read from backed up data does not seems possible with rsync
terre:/mnt/memdisk# tar -czf - SRC > backup.file
terre:/mnt/memdisk# mkdir DST
terre:/mnt/memdisk# cd DST
terre:/mnt/memdisk/DST# tar -xzf - < ../backup.file
terre:/mnt/memdisk/DST#
tar can read a backup from stdin and write a backup to stdout.
For Remote Network storage, if you use a personal NAS you may avoid generating ciphered backup, though you still should transfer it using a secured protocol if the underlaying network is not your own from end to end (for example a part of the path goes over Internet without IPSec or equivalent).
Why ciphering backup if using secure transfer protocol?
In the following we will use both: secure protocol and ciphered backup, without using local storage. We will also need compression to save precious space (usually you pay for the cloud storage you use) and maybe slicing depending on the constraints imposed by the remote storage (some provider ask you to pay an extra amount to store larger files, having slicing avoids you paying extra cost in such context). Another use case of slicing is when the file transfer protocol is not able to continue an interrupted transfer, you will then only need to restart it for the last slice, not the whole backup.
terre:/mnt/memdisk# sftp denis@dar
The authenticity of host 'dar (192.168.6.32)' can't be established.
RSA key fingerprint is SHA256:KN3o/psWC512grcZ5/J5dTSg9PzIXbZAHiig/hqfkc8.
Are you sure you want to continue connecting (yes/no)? yes
Warning: Permanently added 'dar,192.168.6.32' (RSA) to the list of known hosts.
denis@dar's password:
Connected to denis@dar.
sftp> bye
terre:/mnt/memdisk#
terre:/mnt/memdisk# dar -c sftp://denis@dar/home/denis/backup -R / -g etc -K aes: -zlz4 -s 1M
Please provide the password for login denis at host dar:
Archive backup requires a password:
Please confirm your password:
--------------------------------------------
2360 inode(s) saved
including 0 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
0 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
27 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 2387
--------------------------------------------
EA saved for 0 inode(s)
FSA saved for 1523 inode(s)
--------------------------------------------
terre:/mnt/memdisk# sftp denis@dar
denis@dar's password:
Connected to denis@dar.
sftp> ls -l
-rw-r--r-- 1 denis denis 1048576 Nov 26 17:15 backup.1.dar
-rw-r--r-- 1 denis denis 1048576 Nov 26 17:15 backup.2.dar
-rw-r--r-- 1 denis denis 1048576 Nov 26 17:15 backup.3.dar
-rw-r--r-- 1 denis denis 474982 Nov 26 17:15 backup.4.dar
sftp> bye
terre:/mnt/memdisk#
The backup results in four ciphered slices located on the remote sftp server. Let's add
a -E option
to see which slice are being read while testing the archive.
terre:/mnt/memdisk# dar -t sftp://denis@dar/home/denis/backup -E "echo 'openning slice %p/%b.%N.%e'"
Please provide the password for login denis at host dar:
openning slice /home/denis/backup.4.dar
Archive backup requires a password:
Warning, the archive backup has been encrypted. A wrong key is not possible to detect, it would cause DAR to report the archive as corrupted
openning slice /home/denis/backup.1.dar
openning slice /home/denis/backup.2.dar
openning slice /home/denis/backup.3.dar
openning slice /home/denis/backup.4.dar
--------------------------------------------
2360 item(s) treated
0 item(s) with error
0 item(s) ignored (excluded by filters)
--------------------------------------------
Total number of items considered: 2360
--------------------------------------------
terre:/mnt/memdisk#
We see that all slices have been read as expected, now let's restore /etc/fstab in the current directory and compare the restored files with the real /etc/fstab
terre:/mnt/memdisk# dar -x sftp://denis@dar/home/denis/backup -E "echo 'openning slice %p/%b.%N.%e'" -g etc/fstab --flat
Please provide the password for login denis at host dar:
openning slice /home/denis/backup.4.dar
Archive backup requires a password:
Warning, the archive backup has been encrypted. A wrong key is not possible to detect, it would cause DAR to report the archive as corrupted
1 inode(s) restored
including 0 hard link(s)
0 inode(s) not restored (not saved in archive)
0 inode(s) not restored (overwriting policy decision)
269 inode(s) ignored (excluded by filters)
0 inode(s) failed to restore (filesystem error)
0 inode(s) deleted
--------------------------------------------
Total number of inode(s) considered: 270
--------------------------------------------
EA restored for 0 inode(s)
FSA restored for 0 inode(s)
--------------------------------------------
terre:/mnt/memdisk# diff fstab /etc/fstab
terre:/mnt/memdisk# echo $?
0
terre:/mnt/memdisk#
As seen above only one slice (slice #4) has been necessary to restore /etc/fstab. But let's save two files, a huge one and a small one into a single sliced backup and measure the transfer time of backup and restoration of this ciphered an compressed backup through sftp. We have added public key authentification for precise time measurement:
terre:/mnt/memdisk# sftp denis@dar
Connected to denis@dar.
sftp> bye
terre:/mnt/memdisk# ls -l SRC
total 315396
-rw------- 1 root root 322961408 Nov 26 17:28 devuan_beowulf_3.0.0_amd64-netinstall.iso
-rw-r--r-- 1 root root 994 Nov 26 17:29 fstab
terre:/mnt/memdisk#
terre:/mnt/memdisk# time dar -c sftp://denis@dar/home/denis/backup -R SRC -z6 -K aes:hello -afile-auth -q
20.769u 2.445s 0:22.77 101.8% 0+0k 0+0io 0pf+0w
terre:/mnt/memdisk# mkdir DST
terre:/mnt/memdisk# time dar -x sftp://denis@dar/home/denis/backup -R DST -K hello -afile-auth -q
Warning, the archive backup has been encrypted. A wrong key is not possible to detect, it would cause DAR to report the archive as corrupted
11.826u 4.211s 0:15.88 100.9% 0+0k 0+0io 0pf+0w
terre:/mnt/memdisk# diff -rs SRC DST
Files SRC/devuan_beowulf_3.0.0_amd64-netinstall.iso and DST/devuan_beowulf_3.0.0_amd64-netinstall.iso are identical
Files SRC/fstab and DST/fstab are identical
terre:/mnt/memdisk# rm DST/fstab
terre:/mnt/memdisk# time dar -x sftp://denis@dar/home/denis/backup -R DST -K hello -afile-auth -q -g fstab
Warning, the archive backup has been encrypted. A wrong key is not possible to detect, it would cause DAR to report the archive as corrupted
0.680u 0.012s 0:00.87 79.3% 0+0k 0+0io 0pf+0w
terre:/mnt/memdisk#
While restoring the whole backup needs 15 seconds of transfer time, restoring fstab
alone requires only 0.87 second,
as there is only one slice, this shows that dar is reading only the necessary part of the archive even within a slice to
perform the operation.
terre:/mnt/memdisk# rsync -arHAXSzq /etc denis@dar:/home/denis
terre:/mnt/memdisk# mkdir DST
terre:/mnt/memdisk# rsync -arHAXSzq denis@dar:/home/denis/etc/fstab .
terre:/mnt/memdisk# diff fstab /etc/fstab
terre:/mnt/memdisk# echo $?
0
terre:/mnt/memdisk#
We can backup to a remote sftp server, we can compress the data on-fly but it is not stored compressed nor ciphered. Restoration operation is possible per file or for the whole backup
tar has no way to perform sftp or other secured transfer protocol, nor encryption by itself. When time comes to restore a particular file, the whole backup has to be retrieved, unciphered and uncompressed to restore even just a sigle file
The objective of this test is to measure the way the backup tools under test behave when the backup has been corrupted. We will here just flip one byte of the backup, at the beginning, in the middle or at the end of the backup and observe the consequences in term of ability to restore the backup.
terre:/mnt/memdisk# dar -c backup -R SRC -z6
--------------------------------------------
74725 inode(s) saved
including 0 hard link(s) treated
0 inode(s) changed at the moment of the backup and could not be saved properly
0 byte(s) have been wasted in the archive to resave changing files
0 inode(s) with only metadata changed
0 inode(s) not saved (no inode/file change)
0 inode(s) failed to be saved (filesystem error)
0 inode(s) ignored (excluded by filters)
0 inode(s) recorded as deleted from reference backup
--------------------------------------------
Total number of inode(s) considered: 74725
--------------------------------------------
EA saved for 0 inode(s)
FSA saved for 0 inode(s)
--------------------------------------------
terre:/mnt/memdisk# ls -al backup*
-rw-r--r-- 1 root root 219088536 Nov 17 17:45 backup.1.dar
terre:/mnt/memdisk# ./hide_change backup.1.dar 1
terre:/mnt/memdisk# mkdir DST
terre:/mnt/memdisk# dar -x backup -R DST
backup.1.dar is not a valid file (wrong magic number), please provide the good file. [return = YES | Esc = NO]
Escaping...
Final memory cleanup...
Aborting program. User refused to continue while asking: backup.1.dar is not a valid file (wrong magic number), please provide the good file.
terre:/mnt/memdisk# dar -x backup -R DST -alax
LAX MODE: In spite of its name, backup.1.dar does not appear to be a dar slice, assuming a data corruption took place and continuing
LAX MODE: Archive is flagged as having escape sequence (which is normal in recent archive versions). However if this is not expected, shall I assume a data corruption occurred in this field and that this flag should be ignored? (If unsure, refuse) [return = YES | Esc = NO]
Escaping...
--------------------------------------------
74725 inode(s) restored
including 0 hard link(s)
0 inode(s) not restored (not saved in archive)
0 inode(s) not restored (overwriting policy decision)
0 inode(s) ignored (excluded by filters)
0 inode(s) failed to restore (filesystem error)
0 inode(s) deleted
--------------------------------------------
Total number of inode(s) considered: 74725
--------------------------------------------
EA restored for 0 inode(s)
FSA restored for 0 inode(s)
--------------------------------------------
terre:/mnt/memdisk# diff -r SRC DST
terre:/mnt/memdisk# echo $?
0
terre:/mnt/memdisk#
Modifying the first bit dar has seen the corruption. We can use the lax mode (-alax
option) to bypass this
corruption and then the restoration proceeds normally. We can try a bit further for example somewhere in the middle of the archive,
thus at offset 876354144 (half of the size expressed in bit, not byte):
terre:/mnt/memdisk# ls -l backup*
-rw-r--r-- 1 root root 219088536 Nov 17 17:45 backup.1.dar
terre:/mnt/memdisk# hide_change backup.1.dar 876354144
terre:/mnt/memdisk# rm -rf DST
terre:/mnt/memdisk# mkdir DST
terre:/mnt/memdisk# dar -x backup -R DST
Error while restoring /mnt/memdisk/DST/linux-5.9.8/drivers/mtd/spi-nor/core.c : compressed data CRC error
--------------------------------------------
74724 inode(s) restored
including 0 hard link(s)
0 inode(s) not restored (not saved in archive)
0 inode(s) not restored (overwriting policy decision)
0 inode(s) ignored (excluded by filters)
1 inode(s) failed to restore (filesystem error)
0 inode(s) deleted
--------------------------------------------
Total number of inode(s) considered: 74725
--------------------------------------------
EA restored for 0 inode(s)
FSA restored for 0 inode(s)
--------------------------------------------
Final memory cleanup...
All files asked could not be restored
terre:/mnt/memdisk# diff -rq SRC DST
Files SRC/linux-5.9.8/drivers/mtd/spi-nor/core.c and DST/linux-5.9.8/drivers/mtd/spi-nor/core.c differ
terre:/mnt/memdisk#
One file could not be restored properly as reported by dar, but all other files could be and are identical to their respective originals. Let's modifying the last bit for completness:
terre:/mnt/memdisk# ls -al *.dar
-rw-r--r-- 1 root root 219088537 Nov 17 17:45 backup.1.dar
terre:/mnt/memdisk# cp backup.1.dar backop.1.dar
terre:/mnt/memdisk# hide_change backup.1.dar 1752708287
terre:/mnt/memdisk# ls -al *.dar
-rw-r--r-- 1 root root 219088536 Nov 17 17:58 backop.1.dar
-rw-r--r-- 1 root root 219088536 Nov 17 17:58 backup.1.dar
terre:/mnt/memdisk# diff backup.1.dar backop.1.dar
Binary files backup.1.dar and backop.1.dar differ
terre:/mnt/memdisk# rm -rf DST
terre:/mnt/memdisk# mkdir DST
terre:/mnt/memdisk# dar -x backup -R DST
--------------------------------------------
74725 inode(s) restored
including 0 hard link(s)
0 inode(s) not restored (not saved in archive)
0 inode(s) not restored (overwriting policy decision)
0 inode(s) ignored (excluded by filters)
0 inode(s) failed to restore (filesystem error)
0 inode(s) deleted
--------------------------------------------
Total number of inode(s) considered: 74725
--------------------------------------------
EA restored for 0 inode(s)
FSA restored for 0 inode(s)
--------------------------------------------
terre:/mnt/memdisk#
terre:/mnt/memdisk# diff -rq SRC DST
terre:/mnt/memdisk# echo $?
0
terre:/mnt/memdisk#
By chance it did not affected the ability to restore the backup. However if it ever had,
we have several fallbacks: the --sequential-read
mode, the use a already created
snapshot (aka isolated catalogue) as seen about the snapshot feature to backup the internal
table of content, or as last resort
the -alax option
eventually combined with the --sequential-read
mode and a backup snapshot.
terre:/mnt/memdisk# rm -rf DST
terre:/mnt/memdisk# rsync -arHAXS SRC/* DST
terre:/mnt/memdisk# ls -al DST
total 0
drwxr-xr-x 3 root root 60 Nov 17 18:07 .
drwxrwxrwt 4 root root 140 Nov 17 18:07 ..
drwxrwxr-x 24 root root 740 Nov 10 21:16 linux-5.9.8
terre:/mnt/memdisk# diff -rq SRC DST
terre:/mnt/memdisk# ./hide_change DST/linux-5.9.8/README 10
terre:/mnt/memdisk# diff -rq SRC DST
Files SRC/linux-5.9.8/README and DST/linux-5.9.8/README differ
terre:/mnt/memdisk# rsync -arvHAXS SRC/* DST
sending incremental file list
sent 1,254,054 bytes received 5,215 bytes 839,512.67 bytes/sec
total size is 954,980,692 speedup is 758.36
terre:/mnt/memdisk# diff -rq SRC DST
Files SRC/linux-5.9.8/README and DST/linux-5.9.8/README differ
terre:/mnt/memdisk#
modifying the backup (the directory we sync with), rsync does not report any difference and the backup stay corrupted.
terre:/mnt/memdisk# rm -rf DST
terre:/mnt/memdisk# tar -czf backup.tar.gz SRC
terre:/mnt/memdisk# ls -l backup*
-rw-r--r-- 1 root root 183659664 Nov 17 18:11 backup.tar.gz
terre:/mnt/memdisk# ./hide_change backup.tar.gz 1
terre:/mnt/memdisk# mkdir DST
terre:/mnt/memdisk# cd DST
terre:/mnt/memdisk/DST# tar -xzf ../backup.tar.gz
gzip: stdin: not in gzip format
tar: Child returned status 1
tar: Error is not recoverable: exiting now
terre:/mnt/memdisk/DST#
terre:/mnt/memdisk/DST# find . -ls
1720964 0 drwxr-xr-x 2 root root 40 Nov 17 18:56 .
terre:/mnt/memdisk/DST#
Modifying the first byte leads to a completely unusable backup. Nothing got restored at all. Let's see what going on when modifying a single bit in the middle of the backup:
terre:/mnt/memdisk# ls -l backup*
-rw-r--r-- 1 root root 183659664 Nov 17 18:11 backup.tar.gz
terre:/mnt/memdisk# ./hide_change backup.tar.gz 734638656
terre:/mnt/memdisk# cd DST
terre:/mnt/memdisk/DST# tar -xf ../backup.tar.gz
tar: Skipping to next header
gzip: stdin: invalid compressed data--crc error
gzip: stdin: invalid compressed data--length error
tar: Child returned status 1
tar: Error is not recoverable: exiting now
terre:/mnt/memdisk/DST#
terre:/mnt/memdisk/DST# diff -rq ../SRC SRC | wc -l
diff: SRC/linux-5.9.8/scripts/dtc/include-prefixes/arc: No such file or directory
diff: SRC/linux-5.9.8/scripts/dtc/include-prefixes/arm: No such file or directory
diff: SRC/linux-5.9.8/scripts/dtc/include-prefixes/arm64: No such file or directory
diff: SRC/linux-5.9.8/scripts/dtc/include-prefixes/c6x: No such file or directory
diff: SRC/linux-5.9.8/scripts/dtc/include-prefixes/h8300: No such file or directory
diff: SRC/linux-5.9.8/scripts/dtc/include-prefixes/microblaze: No such file or directory
diff: SRC/linux-5.9.8/scripts/dtc/include-prefixes/mips: No such file or directory
diff: SRC/linux-5.9.8/scripts/dtc/include-prefixes/nios2: No such file or directory
diff: SRC/linux-5.9.8/scripts/dtc/include-prefixes/openrisc: No such file or directory
diff: SRC/linux-5.9.8/scripts/dtc/include-prefixes/powerpc: No such file or directory
diff: SRC/linux-5.9.8/scripts/dtc/include-prefixes/sh: No such file or directory
diff: SRC/linux-5.9.8/scripts/dtc/include-prefixes/xtensa: No such file or directory
diff: SRC/linux-5.9.8/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S: No such file or directory
diff: SRC/linux-5.9.8/tools/testing/selftests/powerpc/copyloops/copyuser_64.S: No such file or directory
diff: SRC/linux-5.9.8/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S: No such file or directory
diff: SRC/linux-5.9.8/tools/testing/selftests/powerpc/copyloops/memcpy_64.S: No such file or directory
diff: SRC/linux-5.9.8/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S: No such file or directory
diff: SRC/linux-5.9.8/tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h: No such file or directory
diff: SRC/linux-5.9.8/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h: No such file or directory
diff: SRC/linux-5.9.8/tools/testing/selftests/powerpc/primitives/asm/asm-const.h: No such file or directory
diff: SRC/linux-5.9.8/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h: No such file or directory
diff: SRC/linux-5.9.8/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h: No such file or directory
diff: SRC/linux-5.9.8/tools/testing/selftests/powerpc/primitives/word-at-a-time.h: No such file or directory
diff: SRC/linux-5.9.8/tools/testing/selftests/powerpc/stringloops/memcmp_32.S: No such file or directory
diff: SRC/linux-5.9.8/tools/testing/selftests/powerpc/stringloops/memcmp_64.S: No such file or directory
diff: SRC/linux-5.9.8/tools/testing/selftests/powerpc/stringloops/strlen_32.S: No such file or directory
diff: SRC/linux-5.9.8/tools/testing/selftests/powerpc/vphn/asm/lppaca.h: No such file or directory
diff: SRC/linux-5.9.8/tools/testing/selftests/powerpc/vphn/vphn.c: No such file or directory
150
terre:/mnt/memdisk/DST# find ../SRC | wc -l
74726
terre:/mnt/memdisk/DST# find SRC | wc -l
32615
terre:/mnt/memdisk/DST#
Only 32615 files on the 74726 that were saved could be restored. Assuming the problem is due to the fact the backup is compressed, let's see tar without compression:
terre:/mnt/memdisk# tar -cf backup.tar SRC
terre:/mnt/memdisk# ls -l backup*
-rw-r--r-- 1 root root 1011312640 Nov 17 19:28 backup.tar
terre:/mnt/memdisk# ./hide_change backup.tar 1
terre:/mnt/memdisk# rm -rf DST
terre:/mnt/memdisk# mkdir DST
terre:/mnt/memdisk# cd DST
terre:/mnt/memdisk/DST# tar -xf ../backup.tar
tar: This does not look like a tar archive
tar: Skipping to next header
tar: Exiting with failure status due to previous errors
terre:/mnt/memdisk/DST# diff -rq ../SRC SRC
terre:/mnt/memdisk/DST# echo $?
0
terre:/mnt/memdisk/DST#
Without compression, a tar backup is much more reliable, however we now need more than 5 times storage space to hold the backup. Let's see what happens when we modify a single bit in the midle of the backup:
terre:/mnt/memdisk# ./hide_change backup.tar 4045250560
terre:/mnt/memdisk# rm -rf DST
terre:/mnt/memdisk# mkdir DST
terre:/mnt/memdisk# cd DST
terre:/mnt/memdisk/DST# tar -xf ../backup.tar
terre:/mnt/memdisk/DST#
terre:/mnt/memdisk/DST# diff -rq ../SRC SRC
Files ../SRC/linux-5.9.8/drivers/media/pci/bt8xx/bttv-cards.c and SRC/linux-5.9.8/drivers/media/pci/bt8xx/bttv-cards.c differ
terre:/mnt/memdisk/DST#
the backup restoration suceeded according to tar but the corruption has been completely ignored!!! The result is both a corrupted backup and a corrupted restored data, with no notification at all...
We can increase the robustness of any file or set of files by mean of Parchive software. If its use is adapted to tar and dar it is not adapted to rsync due to the directory tree structure it uses for its backup. We will thus here measure the par2create (Parchive) execution time compared to backup time of tar and dar.
devuan:/mnt/memdisk# mkdir SRC
devuan:/mnt/memdisk# cp --preserve -r /usr SRC
devuan:/mnt/memdisk# time tar -czf backup.tar.gz SRC
62.550u 3.148s 1:01.75 106.3% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# time dar -c backup -z6 -1 0 -at -R SRC -q
60.287u 1.152s 1:01.45 99.9% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# ls -l backup.*
-rw-r--r-- 1 root root 601976990 Dec 1 10:48 backup.1.dar
-rw-r--r-- 1 root root 588260243 Dec 1 10:47 backup.tar.gz
devuan:/mnt/memdisk# time par2create -r5 -n1 -q backup.tar.gz
Opening: backup.tar.gz
Done
94.465u 0.535s 0:05.74 1654.8% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# time par2create -r5 -n1 -q backup.1.dar
Opening: backup.1.dar
Done
110.048u 0.364s 0:06.19 1783.5% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk#
We see that the redundancy process is not negligible: for 5% of data redundancy, we need around 10% of extra CPU time. Things get worse when the size of the data to process by Parchive increases and when the disk I/O comes to play (here the /mnt/memdisk was an in-memory tmpfs filesystem). This is the case when the amount of data to backup is larger than the available RAM space, which is a quite frequent situation:
devuan:~/tmp# du -sh SRC
27G SRC
devuan:~/tmp# free -m
. total used free shared buff/cache available
Mem: 15776 560 433 1 14782 14823
Swap: 0 0 0
devuan:~/tmp#
We are now placed in this context, having a 27 GiB data to backup on a machine having only around 16 GiB of RAM.
devuan:~/tmp# time dar -c backup -z6 -at -1 0 -R SRC -q -E 'par2create -r5 -n1 -q %b.%N.%e'
Opening: backup.1.dar
Done
15237.777u 105.379s 36:48.53 694.7% 0+0k 158854544+57572120io 158pf+0w
devuan:~/tmp#
This execution time of around 36 mn above can be improved by using multiple slices. Choosing a slice size smaller than the available RAM let Parchive compute parity data right after each slice has been generated, while it is still in the disk cache (RAM), bypassing the corresponding disks I/O we had previously in a second time:
devuan:~/tmp# time dar -c backup_splitted -z6 -at -1 0 -R SRC -q -E 'par2create -r5 -n1 -q %b.%N.%e' -s 1G
Opening: backup_splitted.1.dar
Done
Opening: backup_splitted.2.dar
Done
[...]
Opening: backup_splitted.26.dar
Done
Opening: backup_splitted.27.dar
Done
6040.106u 49.201s 21:58.73 461.7% 0+0k 61862640+57567104io 123pf+0w
devuan:~/tmp#
The total execution time dropped to around 22 mn! Which makes a 40% time reduction. By the way, having here 27 files of 1 GiB is also easier to manipulate (file transfer, copy to removable media,...) than a huge equivalent file of 27 GiB.
devuan:~/tmp# time tar -czf backup.tar.gz SRC
837.662u 80.776s 18:26.08 83.0% 0+0k 54926128+54799696io 6pf+0w
devuan:~/tmp# time par2create -r5 -n1 -q backup.tar.gz
Opening: backup.tar.gz
Done
13352.393u 71.390s 18:24.34 1215.5% 0+0k 95000064+2772144io 9pf+0w
devuan:~/tmp#
We get the same execution time (18 mn) for both tar and par2 for thus a total of 36 mn. This same time for both software while the real CPU usage is much more important for par2, clearly shows that the slowest operation was the disk I/O. Else the overall time of the operation is similar to what we say with dar above, except that we cannot use multi-volume to speed up the operation as we did with dar: tar is not able to compress *and* produce multi-volume backup: What we would gain on one side would be lost on the other side...
In order to compare performance in a fair manner, we have to take into consideration that some CPU intensive features are not implement by all softwares or have different default values:
-z6 option
-S option
)
and disable it for dar
(activated by default): -1 0 option
-at option
All performance aspects are not interesting in all use cases. We can distinguish two main types of use cases:
When using rsync as a backup tool (at the opposite of a copy operation), we assume the remote (or local) copy is the backup, and thus restoring implies syncing back this remote (or local) copy to the place the original data was located and has been lost.
To prepare the data under test we used:
All data to backup or to copy has been stored in a tmpfs, which is also the destination of the created backups and restored data. The swap has been disabled to avoid any disk I/O penalty, in the intention to provide a fair comparison environment (avoiding disk cache variable performance).
devuan:/mnt/memdisk# free
total used free shared buff/cache available
Mem: 16155172 691764 151608 8780152 15311800 6299316
Swap: 0 0 0
devuan:/mnt/memdisk# df -h .
Filesystem Size Used Avail Use% Mounted on
tmpfs 14G 4.0K 14G 1% /mnt/memdisk
devuan:/mnt/memdisk#
To prepare the Linux system under backup we installed the Devuan system a few days before in a VM. On day D, a full backup has been executed, we updated/upgraded the system using the disto package manager and we made a differential backup based on the first one, both backup being wrote to the testing machine (bare-metal server) that was used for the performance tests:
root@Georges:~# dar -c sftp://denis@10.13.30.163/home/denis/tmp/full -zlz4 -R / -M -D --hash sha1 -afile-auth -C cat_full
root@Georges:~# apt-get update
[...]
root@Georges:~# apt-get upgrade
[...]
root@Georges:~# dar -c sftp://denis@10.13.30.163/home/denis/tmp/diff -A cat_full -zlz4 -R / -M -D --hash sha1 -afile-auth
root@Georges:~#
Back on the testing host (the bare-metal server at 10.13.30.163) we restored the data for the performance test, the following way: excluding FSA and EA to avoid a tone of warning as those are not supported on tmpfs filesystem:
devuan:/mnt/memdisk# mkdir state-1
devuan:/mnt/memdisk# mkdir state-2
devuan:/mnt/memdisk# dar -x ~denis/tmp/full -R state-1 --fsa-scope none -u "*"
--------------------------------------------
136836 inode(s) restored
including 27 hard link(s)
0 inode(s) not restored (not saved in archive)
0 inode(s) not restored (overwriting policy decision)
0 inode(s) ignored (excluded by filters)
0 inode(s) failed to restore (filesystem error)
0 inode(s) deleted
--------------------------------------------
Total number of inode(s) considered: 136836
--------------------------------------------
EA restored for 3 inode(s)
FSA restored for 0 inode(s)
--------------------------------------------
devuan:/mnt/memdisk# dar -x ~denis/tmp/full -R state-2 --fsa-scope none -u "*"
--------------------------------------------
136836 inode(s) restored
including 27 hard link(s)
0 inode(s) not restored (not saved in archive)
0 inode(s) not restored (overwriting policy decision)
0 inode(s) ignored (excluded by filters)
0 inode(s) failed to restore (filesystem error)
0 inode(s) deleted
--------------------------------------------
Total number of inode(s) considered: 136836
--------------------------------------------
EA restored for 3 inode(s)
FSA restored for 0 inode(s)
--------------------------------------------
devuan:/mnt/memdisk# dar -x ~denis/tmp/diff -R state-2 --fsa-scope none -u "*" -w
--------------------------------------------
568 inode(s) restored
including 0 hard link(s)
136670 inode(s) not restored (not saved in archive)
0 inode(s) not restored (overwriting policy decision)
0 inode(s) ignored (excluded by filters)
0 inode(s) failed to restore (filesystem error)
0 inode(s) deleted
--------------------------------------------
Total number of inode(s) considered: 137238
--------------------------------------------
EA restored for 0 inode(s)
FSA restored for 0 inode(s)
--------------------------------------------
devuan:/mnt/memdisk#
This leads us to have two directories state-1
and state-2
corresponding to the state of the Devuan machine has two days ago and today respectively.
To perform the copy operation, we have decomposed the operations to precisely measure the execution time. We could have decided to pipe the backup to a second instance of the
backup tool restoring the data (tar and dar only would benefit from this). But the time measurement was less easy to obtain and doing that way does not seem to
provide any noticable speed improvement. The data used here in SRC1
is a single big ISO file (a Devuan installation DVD image).
devuan:/mnt/memdisk# time dar -c copy -1 0 -at -R SRC1 -q
1.834u 2.909s 0:04.87 97.1% 0+0k 744+0io 12pf+0w
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# time dar -x copy -R DST -q
1.563u 2.836s 0:04.41 99.5% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# du -sh SRC1 DST
4.4G SRC1
4.4G DST
devuan:/mnt/memdisk# diff -r SRC1 DST
devuan:/mnt/memdisk# echo $?
0
devuan:/mnt/memdisk# rm -rf DST copy.1.dar
devuan:/mnt/memdisk# time dar -c copy -1 0 -at -R SRC2 -q
4.739u 3.683s 0:08.44 99.6% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# time dar -x copy -R DST -q
4.449u 3.872s 0:08.34 99.6% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# du -sh SRC2 DST
4.1G SRC2
4.1G DST
devuan:/mnt/memdisk# find DST | wc -l
136837
devuan:/mnt/memdisk#
The overall copy time for dar is:
SRC1
SRC2
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# time rsync -arH SRC1 DST
23.088u 5.420s 0:15.28 186.5% 0+0k 168+0io 4pf+0w
devuan:/mnt/memdisk# diff -r SRC1/ DST/SRC1/
devuan:/mnt/memdisk# echo $?
0
devuan:/mnt/memdisk# rm -rf DST
devuan:/mnt/memdisk# time rsync -arH SRC2 DST
22.408u 8.560s 0:16.59 186.6% 0+0k 1224+0io 6pf+0w
devuan:/mnt/memdisk# du -sh SRC2 DST
4.1G SRC2
4.1G DST
devuan:/mnt/memdisk# find DST | wc -l
136838
devuan:/mnt/memdisk#
The overall copy time for rsync is:
SRC1
SRC2
devuan:/mnt/memdisk# cd SRC1
devuan:/mnt/memdisk/SRC1# time tar -cf ../copy.tar *
0.343u 2.756s 0:03.10 99.6% 0+0k 104+0io 1pf+0w
devuan:/mnt/memdisk/SRC1# cd ../
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# cd DST
devuan:/mnt/memdisk/DST# time tar -xf ../copy.tar
0.339u 3.071s 0:03.41 99.7% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/DST# cd ..
devuan:/mnt/memdisk# diff -r SRC1/ DST/
devuan:/mnt/memdisk# echo $?
0
devuan:/mnt/memdisk# rm -rf DST copy.tar
devuan:/mnt/memdisk# cd SRC2
devuan:/mnt/memdisk/SRC2# time tar -cf ../copy.tar *
tar: tmp/.ICE-unix/19789: socket ignored
tar: tmp/.X11-unix/X0: socket ignored
0.760u 2.887s 0:03.66 99.4% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/SRC2# cd ..
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# cd DST
devuan:/mnt/memdisk/DST# time tar -xf ../copy.tar
0.814u 3.556s 0:04.38 99.5% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/DST# cd ..
devuan:/mnt/memdisk# du -sh SRC2 DST
4.1G SRC2
4.1G DST
devuan:/mnt/memdisk# find DST | wc -l
136834
devuan:/mnt/memdisk#
the overall copy time for tar is:
SRC1
SRC2
devuan:/mnt/memdisk# time cp --preserve -r SRC1 DST
0.051u 2.514s 0:02.58 99.2% 0+0k 8+0io 1pf+0w
devuan:/mnt/memdisk# diff -r SRC1 DST
devuan:/mnt/memdisk# echo $?
0
devuan:/mnt/memdisk# rm -rf DST
devuan:/mnt/memdisk# time cp --preserve -r SRC2 DST
0.910u 4.194s 0:05.15 99.0% 0+0k 288+0io 1pf+0w
devuan:/mnt/memdisk# du -sh SRC2 DST
4.1G SRC2
4.2G DST
devuan:/mnt/memdisk# find DST | wc -l
136838
devuan:/mnt/memdisk# find DST/SRC2/tmp/ -ls
2315983 0 drwxrwxrwt 4 root root 100 Dec 3 10:32 DST/SRC2/tmp/
2315987 0 drwxrwxrwt 2 root root 60 Dec 3 10:27 DST/SRC2/tmp/.ICE-unix
2315988 0 srwxrwxrwx 1 denis denis 0 Dec 3 10:27 DST/SRC2/tmp/.ICE-unix/19789
2315985 0 drwxrwxrwt 2 root root 60 Dec 3 10:32 DST/SRC2/tmp/.X11-unix
2315986 0 srwxrwxrwx 1 root root 0 Dec 3 10:32 DST/SRC2/tmp/.X11-unix/X0
2315984 4 -r--r--r-- 1 root root 11 Dec 3 10:32 DST/SRC2/tmp/.X0-lock
devuan:/mnt/memdisk#
The overall copy time for cp is:
SRC1
SRC2
cp is always the fastest and does reject the unix sockets as tar does. However it requires slightly more storage than all other softwares tested here. And if metadata (ACL, Extended Attributes, filesystem specific attributes, ...) need to be copied with data, it does not match the need.
For reference:
devuan:/mnt/memdisk# du -sb state-*
4095931349 state-1
4136318367 state-2
devuan:/mnt/memdisk# find state-1 | wc -l
136837
devuan:/mnt/memdisk# find state-2 | wc -l
137239
devuan:/mnt/memdisk#
devuan:/mnt/memdisk# time dar -c dar-full -R state-1 -at -1 0 -z6 -q
145.970u 3.263s 2:29.73 99.6% 0+0k 12344+0io 71pf+0w
devuan:/mnt/memdisk# time dar -c dar-diff -R state-2 -A dar-full -at -1 0 -z6 -q -asecu
8.957u 0.959s 0:09.93 99.6% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# ls -l *.dar
-rw-r--r-- 1 root root 49498524 Dec 3 16:17 dar-diff.1.dar
-rw-r--r-- 1 root root 1580562224 Dec 3 16:16 dar-full.1.dar
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# time dar -x dar-full -R DST -q
18.677u 4.244s 0:22.94 99.8% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# time dar -x dar-diff -R DST -q -w
2.585u 0.780s 0:03.48 96.5% 0+0k 1856+0io 20pf+0w
devuan:/mnt/memdisk#
devuan:/mnt/memdisk# time dar -x dar-full -R DST -q -w -g etc/fstab
0.934u 0.036s 0:00.98 97.9% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk#
for dar with minimal features (metadata no redundancy -at
, no sparse file consideration -1 0
):
devuan:/mnt/memdisk# rm -rf DST *.dar
devuan:/mnt/memdisk# time dar -c dar-full -R state-1 -at -z6 -q
154.971u 3.000s 2:37.99 99.9% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# time dar -c dar-diff -R state-2 -A dar-full -at -z6 -q -asecu
9.488u 0.871s 0:10.37 99.8% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# ls -l *.dar
-rw-r--r-- 1 root root 49505251 Dec 3 16:27 dar-diff.1.dar
-rw-r--r-- 1 root root 1578428790 Dec 3 16:25 dar-full.1.dar
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# time dar -x dar-full -R DST -q
24.231u 6.110s 0:30.36 99.9% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# time dar -x dar-diff -R DST -q -w
2.677u 0.793s 0:03.48 99.4% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# time dar -x dar-full -R DST -q -w -g etc/fstab
1.067u 0.053s 0:01.13 98.2% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk#
for dar with default features (metadata no redundancy -at
, with sparse file consideration (activated by default))
devuan:/mnt/memdisk# rm -rf DST *.dar
devuan:/mnt/memdisk# time dar -c dar-full -R state-1 -at -z6 --delta sig -q
159.262u 3.332s 2:42.62 99.9% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# time dar -c dar-diff -R state-2 -A dar-full -at -z6 -q -asecu
6.149u 0.950s 0:07.11 99.7% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# ls -l *.dar
-rw-r--r-- 1 root root 23883368 Dec 3 16:39 dar-diff.1.dar
-rw-r--r-- 1 root root 1602481058 Dec 3 16:38 dar-full.1.dar
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# time dar -x dar-full -R DST -q
24.169u 6.163s 0:30.35 99.9% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# time dar -x dar-diff -R DST -q -w
2.481u 0.942s 0:03.44 99.4% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# time dar -x dar-full -R DST -q -w -g etc/fstab
1.205u 0.059s 0:01.27 98.4% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk#
for dar with advanced features (metadata no redundancy -at
, with binary delta computation --delta sig
):
devuan:/mnt/memdisk# mkdir rsync-backup
devuan:/mnt/memdisk# time rsync -arHz --info=stats state-1/* rsync-backup
sent 1,585,540,014 bytes received 2,174,472 bytes 10,080,726.90 bytes/sec
total size is 4,260,538,564 speedup is 2.68
202.640u 8.503s 2:36.98 134.5% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk#
devuan:/mnt/memdisk# time rsync -arHz --info=stats --no-whole-file state-2/* rsync-backup
sent 29,077,377 bytes received 216,581 bytes 3,446,348.00 bytes/sec
total size is 4,300,916,222 speedup is 146.82
7.555u 1.115s 0:07.33 118.1% 0+0k 1784+0io 7pf+0w
devuan:/mnt/memdisk# du -sb rsync-backup
4136318307 rsync-backup
devuan:/mnt/memdisk#
devuan:/mnt/memdisk# rm -rf state-1
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# time rsync -arHz --info=stats --no-whole-file rsync-backup/* DST
sent 1,599,585,756 bytes received 2,181,147 bytes 10,105,784.88 bytes/sec
total size is 4,300,916,222 speedup is 2.69
204.192u 8.306s 2:37.81 134.6% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# time rsync -arHz --info=stats rsync-backup/etc/fstab DST/etc
sent 44 bytes received 12 bytes 112.00 bytes/sec
total size is 664 speedup is 11.86
0.001u 0.002s 0:00.00 0.0% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk#
for rsync with minimal features (no sparse file consideration):
--no-whole-file
see next text)
devuan:/mnt/memdisk# mkdir rsync-backup
devuan:/mnt/memdisk# time rsync -arHSz --info=stats state-1/* rsync-backup
sent 1,585,540,014 bytes received 2,174,460 bytes 8,605,498.50 bytes/sec
total size is 4,260,538,564 speedup is 2.68
232.038u 13.137s 3:03.44 133.6% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# time rsync -arHSz --info=stats --no-whole-file state-2/* rsync-backup
sent 29,077,381 bytes received 216,577 bytes 3,446,348.00 bytes/sec
total size is 4,300,916,222 speedup is 146.82
7.305u 1.275s 0:07.04 121.7% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# du -sb rsync-backup
4136318307 rsync-backup
devuan:/mnt/memdisk# rm -rf state-1
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# time rsync -arHSz --info=stats rsync-backup/* DST
sent 1,599,585,756 bytes received 2,181,219 bytes 10,042,426.18 bytes/sec
total size is 4,300,916,222 speedup is 2.69
205.089u 12.354s 2:38.39 137.2% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk#
devuan:/mnt/memdisk# time rsync -arHSz --info=stats rsync-backup/etc/fstab DST/etc
sent 44 bytes received 12 bytes 112.00 bytes/sec
total size is 664 speedup is 11.86
0.001u 0.002s 0:00.00 0.0% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk#
for rsync with advanced features (sparse file consideration -S option
, binary delta --no-whole-file
)
devuan:/mnt/memdisk# cd state-1
devuan:/mnt/memdisk/state-1# time tar --listed-incremental=../snapshot.file -czf ../tar-full.tar.gz *
tar: tmp/.ICE-unix/19789: socket ignored
tar: tmp/.X11-unix/X0: socket ignored
153.624u 8.676s 2:31.71 106.9% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/state-1# time tar --listed-incremental=../snapshot.file -czf ../tar-diff.tar.gz *
0.809u 0.369s 0:00.98 118.3% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/state-1# cd ..
devuan:/mnt/memdisk# ls -l tar*
-rw-r--r-- 1 root root 765425 Dec 3 16:49 tar-diff.tar.gz
-rw-r--r-- 1 root root 1546464033 Dec 3 16:48 tar-full.tar.gz
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# cd DST
devuan:/mnt/memdisk/DST# time tar -xzf ../tar-full.tar.gz
27.106u 6.756s 0:26.72 126.6% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/DST# cd ..
devuan:/mnt/memdisk# diff --no-dereference -r state-1 DST
Only in state-1: .cache
Only in state-1/tmp/.ICE-unix: 19789
Only in state-1/tmp/.X11-unix: X0
devuan:/mnt/memdisk# cd DST
devuan:/mnt/memdisk/DST# time tar -xzf ../tar-diff.tar.gz
0.183u 0.085s 0:00.18 144.4% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/DST#
Doing that way, the tar differential backup is empty: it only contains empty directories, no file data.
We will apply the changes over state-1
rather than already setup changes at state-2
.
This seems to mean that if the system clock is wrong or was wrong at the time a file was modified
(like daylight saving? or before NTP synchronization at system startup), which is
the same as here, were the changes have been brought before full backup was done, those changes will not be
backed up by tar, while the file's attributes (file size, last modification date,...) changed.
devuan:/mnt/memdisk# cd state-1
devuan:/mnt/memdisk/state-1# time tar --listed-incremental=../snapshot.file -czf ../tar-full.tar.gz *
tar: tmp/.ICE-unix/19789: socket ignored
tar: tmp/.X11-unix/X0: socket ignored
150.751u 8.299s 2:28.59 107.0% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/state-1# cd ..
devuan:/mnt/memdisk# dar -x ~denis/tmp/diff -R state-1 --fsa-scope none -u "*" -w -q
devuan:/mnt/memdisk# cd state-1
devuan:/mnt/memdisk/state-1# time tar --listed-incremental=../snapshot.file -czf ../tar-diff.tar.gz *
6.147u 0.559s 0:06.40 104.5% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/state-1# cd ..
devuan:/mnt/memdisk# ls -l tar* snapshot.file
-rw-r--r-- 1 root root 3350869 Dec 3 17:08 snapshot.file
-rw-r--r-- 1 root root 44607904 Dec 3 17:08 tar-diff.tar.gz
-rw-r--r-- 1 root root 1546448179 Dec 3 17:04 tar-full.tar.gz
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# cd dst
dst: No such file or directory.
devuan:/mnt/memdisk# cd DST
devuan:/mnt/memdisk/DST# time tar -xzf ../tar-full.tar.gz
26.807u 7.020s 0:26.72 126.5% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/DST# time tar -xzf ../tar-diff.tar.gz
1.492u 0.381s 0:01.48 126.3% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/DST# time tar -xzf ../tar-full.tar.gz etc/fstab
25.219u 2.581s 0:25.15 110.4% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/DST#
for tar with minimal features (no sparse file consideration):
Whe had to recreate state-1
as we needed modifying it at previous test
devuan:/mnt/memdisk# rm tar* snapshot.file
devuan:/mnt/memdisk# cd state-1
devuan:/mnt/memdisk/state-1# time tar --listed-incremental=../snapshot.file -czSf ../tar-full.tar.gz *
tar: tmp/.ICE-unix/19789: socket ignored
tar: tmp/.X11-unix/X0: socket ignored
152.878u 10.155s 2:29.38 109.1% 0+0k 1520+0io 18pf+0w
devuan:/mnt/memdisk/state-1# dar -x ~denis/tmp/diff --fsa-scope none -u "*" -w -q
devuan:/mnt/memdisk/state-1# time tar --listed-incremental=../snapshot.file -czSf ../tar-diff.tar.gz *
6.369u 0.752s 0:06.55 108.5% 0+0k 3992+0io 16pf+0w
devuan:/mnt/memdisk/state-1# cd ..
devuan:/mnt/memdisk# ls -l tar* snap*
-rw-r--r-- 1 root root 3350870 Dec 3 17:29 snapshot.file
-rw-r--r-- 1 root root 44604194 Dec 3 17:29 tar-diff.tar.gz
-rw-r--r-- 1 root root 1546226992 Dec 3 17:27 tar-full.tar.gz
devuan:/mnt/memdisk# rm -rf DST
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# cd DST
devuan:/mnt/memdisk/DST# time tar -xzSf ../tar-full.tar.gz
27.331u 7.774s 0:26.27 133.6% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/DST# time tar -xzSf ../tar-diff.tar.gz
1.547u 0.487s 0:01.50 134.6% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/DST# time tar -xzSf ../tar-full.tar.gz etc/fstab
25.068u 2.565s 0:25.00 110.4% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/DST#
for tar with advanced features (sparse file consideration -S option
):
We evaluate here ciphering and deciphering performance. To compare on the same base we use the following parameters:
The content that will be backed up is a copy of /usr
directory tree. We will measure:
devuan:/mnt/memdisk# mkdir SRC
devuan:/mnt/memdisk# cp --preserve -r /usr SRC
devuan:/mnt/memdisk# time dar -c backup -K "aes256:hello world!" --kdf-param 100000:sha1 -R SRC -q -at -1 0
9.213u 3.245s 0:05.38 231.4% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# time dar -x backup -K "hello world!" -R DST -q
Warning, the archive backup has been encrypted. A wrong key is not possible to detect, it would cause DAR to report the archive as corrupted
4.481u 2.628s 0:03.75 189.3% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# rm -rf DST/usr
devuan:/mnt/memdisk# time dar -x backup -K "hello world!" -R DST -q -g usr/bin/diff
Warning, the archive backup has been encrypted. A wrong key is not possible to detect, it would cause DAR to report the archive as corrupted
0.419u 0.025s 0:00.42 102.3% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk#
For dar the operation took:
dar is twice quicker to uncipher than to cipher the whole archive, but restoring a particular file is quite immediate. By default, dar uses argon2 for KDF, which is the most secure algorithm as of year 2020 to derive a key, but we had to adapt to openssl used with tar that does not (yet) support this algorithm.
To avoid plain-text attack a variable length elastic buffer containing random data is encrypted with the rest of the backed up files at the beginning and at the end of the backup, this has some performance penalties (time to generate and time to cipher/decipher). This explains why two identical invocations of dar produce backups of different sizes and execution times:
devuan:/mnt/memdisk# time dar -c backup -K "aes256:hello world!" -at -1 0 -R SRC -q -w
9.782u 3.413s 0:06.28 210.0% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# la backup.1.dar
-rw-r--r-- 1 root root 1572706497 Nov 9 14:50 backup.1.dar
devuan:/mnt/memdisk# time dar -c backup -K "aes256:hello world!" -at -1 0 -R SRC -q -w
9.173u 2.845s 0:05.50 218.3% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk# la backup.1.dar
-rw-r--r-- 1 root root 1572655217 Nov 9 14:50 backup.1.dar
devuan:/mnt/memdisk#
rsync has no way to store the backup ciphered. Testing directly tar now:
tar has not support for ciphering. Though it seems the some use openssl workaround this restriction. To measure the execution time we have to create as script that pipes tar and openssl so we can measure the execution time of this script as a whole. There is thus one script for backup and one for the restoration of tar+openssl.
devuan:/mnt/memdisk# cat > tar.backup
#!/bin/bash
if [ -z "$1" ] ; then
echo "usage: $0 <backup name> [ <file or dir> ]"
exit 1
fi
tar -cf - "$2" | openssl enc -e -aes256 -out "$1" -pbkdf2 -iter 100000 -salt -pass pass:"hello world!"
devuan:/mnt/memdisk#
devuan:/mnt/memdisk# chmod u+x tar.backup
devuan:/mnt/memdisk# cd SRC
devuan:/mnt/memdisk/SRC# time ../tar.backup ../backup.tar.crypted usr
3.954u 2.498s 0:04.69 137.3% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/SRC# cd ..
devuan:/mnt/memdisk# ls -l backup.tar.crypted
-rw-r--r-- 1 root root 1603594272 Nov 9 13:33 backup.tar.crypted
devuan:/mnt/memdisk#
devuan:/mnt/memdisk# cat > tar.restore
#!/bin/bash
if [ -z "$1" ] ; then
echo "usage: $0 <tar.crypted file> [<file or dir>]"
exit 1
fi
openssl enc -d -aes256 -in "$1" -pbkdf2 -iter 100000 -salt -pass pass:"hello world!" | tar -x "$2"
devuan:/mnt/memdisk# chmod u+x tar.restore
devuan:/mnt/memdisk# rm -rf DST
devuan:/mnt/memdisk# mkdir DST
devuan:/mnt/memdisk# cd DST
devuan:/mnt/memdisk/DST# time ../tar.restore ../backup.tar.crypted
1.807u 2.821s 0:02.70 171.1% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/DST#
devuan:/mnt/memdisk/DST# rm -rf usr
devuan:/mnt/memdisk/DST# time ../tar.restore ../backup.tar.crypted usr/bin/diff
1.336u 1.428s 0:01.79 153.6% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/DST# find
.
./usr
./usr/bin
./usr/bin/diff
devuan:/mnt/memdisk/DST#
For tar the operation took:
tar as dar is also twice longer to cipher than to decipher, this seems to be related to the algorithm itself. Though tar is a bit faster than dar but lacks protection against clear-text: the generated encrypted backup have the exact same sizes at one byte precision, this means the blocks boundaries and tar file internal structure always lay at the same file offset for a given content:
devuan:/mnt/memdisk/SRC# time ../tar.backup ../backup.tar.crypted usr
4.112u 2.343s 0:04.72 136.6% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/SRC# ls -l ../bac
backup.1.dar backup.tar.crypted
devuan:/mnt/memdisk/SRC# ls -l ../backup.tar.crypted
-rw-r--r-- 1 root root 1603594272 Nov 9 14:56 ../backup.tar.crypted
devuan:/mnt/memdisk/SRC# time ../tar.backup ../backup.tar.crypted usr
3.952u 2.564s 0:04.79 135.9% 0+0k 0+0io 0pf+0w
devuan:/mnt/memdisk/SRC# ls -l ../backup.tar.crypted
-rw-r--r-- 1 root root 1603594272 Nov 9 14:56 ../backup.tar.crypted
devuan:/mnt/memdisk/SRC#
The following scripts are also available for download from the directory
#!/bin/bash
if [ -z "$1" -o -z "$2" ] ; then
echo "usage: $0 <dir> {phase1 | phase2}"
exit 1
fi
dir="$1"
phase="$2"
case "$phase" in
phase1)
if [ -e "$dir" ] ; then
echo "$dir exists, remove it first"
exit 2
fi
mkdir "$dir"
echo "Hello World!" > "$dir/A.txt"
echo "Bonjour tout le monde !" > "$dir/B.txt"
;;
phase2)
if [ ! -d "$dir" ] ; then
echo "$dir does not exist or is not a directory, run phase1 first"
exit 2
fi
rm -f "$dir/A.txt"
echo "Buongiorno a tutti !" > "$dir/C.txt"
;;
*)
echo "unknown phase"
exit 2
;;
esac
#!/bin/bash
if [ -z "$1" ] ; then
echo "usage: $0 <filename>"
exit 1
fi
while /bin/true ; do touch "$1" ; done
#!/bin/bash
if [ -z "$1" -o -z "$2" ] ; then
echo "usage: $0 <offset in bit> <file>"
echo "flip the bit of the file located at the provided offset"
exit 1
fi
offbit=$1
file="$2"
offbyte=$(( $offbit / 8 ))
bitinbyte=$(( $offbit - ($offbyte * 8) ))
readbyte=`xxd -s $offbyte -p -l 1 "$file"`
mask=$(( 1 << $bitinbyte ))
newbyte=$(( 0x$readbyte ^ $mask ))
hexanewbyte=`printf "%.2x" $newbyte`
echo $hexanewbyte | xxd -p -l 1 -s $offbyte -r - "$file"
#!/bin/bash
if [ -z "$1" ] ; then
echo "usage: $0 <directory>"
exit 1
fi
if [ -e "$1" ] ; then
echo "$1 already exists, remove it or use another directory name"
exit 1
fi
if ! dar -V > /dev/null ; then
echo "need dar to copy unix socket to the test tree"
exit 1
fi
mkdir "$1"
cd "$1"
# creating
mkdir "SUB"
dd if=/dev/zero of=plain_zeroed bs=1024 count=1024
dd if=/dev/urandom of=random bs=1024 count=1024
dd if=/dev/zero of=sparse_file bs=1 count=1 seek=10239999
ln -s random SUB/symlink-broken
ln -s ../random SUB/symlink-valid
mkfifo pipe
mknod null c 3 1
mknod fd1 b 2 1
dar -c - -R / -g dev/log -N -Q -q | dar -x - --sequential-read -N -q -Q
ln sparse_file SUB/hard_linked_sparse_file
ln dev/log SUB/hard_linked_socket
ln pipe SUB/hard_linked_pipe
# modifying dates and permissions
sleep 2
chown nobody random
chown -h bin SUB/symlink-valid
chgrp -h daemon SUB/symlink-valid
sleep 2
echo hello >> random
sleep 2
cat < random > /dev/null
# adding Extend Attributes, assuming the filesystem as user_xattr and acl option set
setfacl -m u:nobody:rwx plain_zeroed && setfattr -n "user.hello" -v "hello world!!!" plain_zeroed || (echo "FAILED TO CREATE EXTENDED ATTRIBUTES" && exit 1)
# adding filesystem specific attributes
chattr +dis plain_zeroed
#!/bin/bash
if [ -z "$1" ] ; then
echo "usage: $0 <filename> [<bit offset>]"
echo "modify one bit and hide the change"
exit 1
fi
atime=`stat "$1" | sed -rn -s 's/^Access:\s+(.*)\+.*/\1/p'`
mtime=`stat "$1" | sed -rn -s 's/^Modify:\s+(.*)\+.*/\1/p'`
bitoffset="$2"
if [ -z "$bitoffset" ] ; then
bitoffset=2
fi
./bitflip "$bitoffset" "$1"
touch -d "$mtime" "$1"
touch -a -d "$atime" "$1"