\contentsline {section}{\numberline {0.1}Status Of This Document}{5} \contentsline {chapter}{\numberline {1}Overview}{6} \contentsline {section}{\numberline {1.1}Disorganized List of Features}{6} \contentsline {section}{\numberline {1.2}Where to get it}{7} \contentsline {section}{\numberline {1.3}Support}{7} \contentsline {section}{\numberline {1.4}Contributing}{7} \contentsline {chapter}{\numberline {2}Goals}{8} \contentsline {section}{\numberline {2.1}First Phase Goals}{8} \contentsline {subsection}{\numberline {2.1.1}Backup Server}{8} \contentsline {section}{\numberline {2.2}Second Phase Goals}{9} \contentsline {subsection}{\numberline {2.2.1}`Transparent' or `Overlay' Filesystem}{9} \contentsline {subsection}{\numberline {2.2.2}Quota Support}{9} \contentsline {subsection}{\numberline {2.2.3}Variable-length Hashing}{10} \contentsline {section}{\numberline {2.3}Non-Goals}{10} \contentsline {subsection}{\numberline {2.3.1}Source Code Repository}{10} \contentsline {chapter}{\numberline {3}Terminology}{11} \contentsline {section}{\numberline {3.1}Files}{11} \contentsline {section}{\numberline {3.2}Host Filesystem or Physical Filesystem}{11} \contentsline {chapter}{\numberline {4}System Architecture}{12} \contentsline {chapter}{\numberline {5}Database Operations}{13} \contentsline {section}{\numberline {5.1}Metadata Operations}{13} \contentsline {subsection}{\numberline {5.1.1}POSIX Metadata Operations}{13} \contentsline {subsection}{\numberline {5.1.2}{\tt fakefs}{} Metadata Operations}{14} \contentsline {section}{\numberline {5.2}Data Operations}{14} \contentsline {subsection}{\numberline {5.2.1}POSIX Data Operations}{14} \contentsline {subsection}{\numberline {5.2.2}{\tt fakefs}{} Data Operations}{15} \contentsline {chapter}{\numberline {6}Berkeley DB: The Least of the Evil}{16} \contentsline {section}{\numberline {6.1}DB: Database Babysitting}{16} \contentsline {section}{\numberline {6.2}Running out of Space}{16} \contentsline {section}{\numberline {6.3}If programmers built cars\dots }{17} \contentsline {section}{\numberline {6.4}Interactions between Transaction and non-Transaction Environment}{17} \contentsline {section}{\numberline {6.5}{\tt LD\_{}PRELOAD}{} Hacking}{17} \contentsline {chapter}{\numberline {7}Database Contents and Organization}{19} \contentsline {section}{\numberline {7.1}Tactics}{19} \contentsline {section}{\numberline {7.2}Physical Filesystem Requirements and Limitations}{19} \contentsline {section}{\numberline {7.3}Non-Limitations}{20} \contentsline {section}{\numberline {7.4}Directory Structure}{20} \contentsline {subsection}{\numberline {7.4.1}{\tt db}}{20} \contentsline {subsection}{\numberline {7.4.2}{\tt log}}{20} \contentsline {subsection}{\numberline {7.4.3}{\tt data}}{20} \contentsline {subsubsection}{{\tt data/main}}{20} \contentsline {subsubsection}{{\tt data/plugins}}{20} \contentsline {subsection}{\numberline {7.4.4}{\tt blobs}}{20} \contentsline {section}{\numberline {7.5}Types}{21} \contentsline {subsection}{\numberline {7.5.1}Inode ({\tt I})}{21} \contentsline {subsection}{\numberline {7.5.2}Epoch ({\tt E})}{21} \contentsline {subsection}{\numberline {7.5.3}$(Inode, Epoch)${}}{21} \contentsline {subsection}{\numberline {7.5.4}Hash ({\tt H})}{22} \contentsline {subsection}{\numberline {7.5.5}Stat ({\tt S})}{22} \contentsline {subsection}{\numberline {7.5.6}Reference ({\tt R})}{23} \contentsline {section}{\numberline {7.6}Tables}{23} \contentsline {subsection}{\numberline {7.6.1}Ownership Tables}{23} \contentsline {subsubsection}{{\tt masters}}{23} \contentsline {subsubsection}{{\tt slaves}}{23} \contentsline {subsubsection}{{\tt clients}}{24} \contentsline {subsubsection}{{\tt cache-closed}}{24} \contentsline {subsubsection}{{\tt cache-open}}{24} \contentsline {subsection}{\numberline {7.6.2}Namespace Tables}{24} \contentsline {subsubsection}{{\tt roots}}{24} \contentsline {subsubsection}{{\tt pie-in}}{24} \contentsline {subsubsection}{{\tt ie-pin}}{25} \contentsline {subsection}{\numberline {7.6.3}Other Tables}{25} \contentsline {subsubsection}{{\tt freei}}{25} \contentsline {paragraph}{F (Free)}{25} \contentsline {paragraph}{R (Reserved)}{25} \contentsline {subsubsection}{{\tt epochs}}{25} \contentsline {subsubsection}{{\tt misc}}{25} \contentsline {paragraph}{Key: Last-Inode}{26} \contentsline {paragraph}{Key: FirstGC}{26} \contentsline {paragraph}{Key: LastGC}{26} \contentsline {section}{\numberline {7.7}Cache}{26} \contentsline {subsection}{\numberline {7.7.1}{\tt inode}}{26} \contentsline {subsection}{\numberline {7.7.2}{\tt hash}}{26} \contentsline {chapter}{\numberline {8}File Insertion}{27} \contentsline {subsection}{\numberline {8.0.3}Score Objects}{29} \contentsline {subsection}{\numberline {8.0.4}Plan Objects}{29} \contentsline {subsection}{\numberline {8.0.5}Representation Search}{29} \contentsline {chapter}{\numberline {9}Configuration}{31} \contentsline {section}{\numberline {9.1}{\tt fakefs.conf} configuration file}{31} \contentsline {section}{\numberline {9.2}Non-Extensions}{31} \contentsline {subsection}{\numberline {9.2.1}{\tt source} {\sl CONFIG}}{31} \contentsline {subsection}{\numberline {9.2.2}{\tt load} {\sl PLUGIN}}{31} \contentsline {subsection}{\numberline {9.2.3}{\tt plugin} {\sl PLUGIN} {\tt command\dots {}}}{31} \contentsline {subsection}{\numberline {9.2.4}{\tt umask} {\sl MASK}}{32} \contentsline {subsection}{\numberline {9.2.5}{\tt dirhash} {\sl DEPTH SIZE}}{32} \contentsline {subsection}{\numberline {9.2.6}{\tt paranoid} {\sl WHAT ON/OFF}}{32} \contentsline {subsubsection}{{\tt paranoid storage} {\sl ON/OFF}}{32} \contentsline {subsubsection}{{\tt paranoid retrieval} {\sl ON/OFF}}{32} \contentsline {subsubsection}{{\tt paranoid duplicates} {\sl ON/OFF}}{32} \contentsline {section}{\numberline {9.3}Garbage Collection Policy}{33} \contentsline {subsection}{\numberline {9.3.1}Garbage Collection Scheduling Policies}{33} \contentsline {subsubsection}{Resource-Demand Garbage Collection Scheduling}{33} \contentsline {subsubsection}{Interval Garbage Collection Scheduling}{33} \contentsline {subsubsection}{Manually Initiated Garbage Collection Scheduling}{33} \contentsline {subsection}{\numberline {9.3.2}Garbage Collection Coverage Policies}{34} \contentsline {subsubsection}{Nothing is Garbage}{34} \contentsline {subsubsection}{Everything is Garbage}{34} \contentsline {subsubsection}{Only Superceded Inodes are Garbage}{34} \contentsline {section}{\numberline {9.4}Hash Functions}{34} \contentsline {subsection}{\numberline {9.4.1}{\tt hash algorithm} {\sl FUNCTION}}{34} \contentsline {subsection}{\numberline {9.4.2}{\tt hash length} {\sl LENGTH}}{34} \contentsline {section}{\numberline {9.5}File Transformation, Storage, and Retrieval}{34} \contentsline {subsection}{\numberline {9.5.1}{\tt transform} {\sl ALGORITHM INPUT-TYPE OUTPUT-TYPE}}{35} \contentsline {subsection}{\numberline {9.5.2}{\tt endpoints} {\sl INPUT-TYPE OUTPUT-TYPE}}{35} \contentsline {subsection}{\numberline {9.5.3}{\tt storage} {\sl METHOD}}{35} \contentsline {section}{\numberline {9.6}Cache Configuration}{35} \contentsline {subsection}{\numberline {9.6.1}{\tt cache directory} {\sl DIRECTORY}}{35} \contentsline {subsection}{\numberline {9.6.2}{\tt cache size} {\sl SIZE}}{36} \contentsline {chapter}{\numberline {10}Storage and Retrieval Methods}{37} \contentsline {section}{\numberline {10.1}{\tt berkeley}}{37} \contentsline {section}{\numberline {10.2}{\tt file}}{37} \contentsline {section}{\numberline {10.3}External Reference}{37} \contentsline {chapter}{\numberline {11}Transform Algorithms}{39} \contentsline {section}{\numberline {11.1}{\tt zlib} and {\tt bzlib}}{39} \contentsline {section}{\numberline {11.2}{\tt filter} Meta-Transform}{39} \contentsline {subsection}{\numberline {11.2.1}Compression Filter Transforms}{40} \contentsline {subsection}{\numberline {11.2.2}Encryption Filter Transforms}{40} \contentsline {section}{\numberline {11.3}Delta Transforms}{40} \contentsline {subsection}{\numberline {11.3.1}{\tt delta} Transform}{40} \contentsline {section}{\numberline {11.4}Mux/Demux Transforms}{41} \contentsline {subsection}{\numberline {11.4.1}Split Transform}{41} \contentsline {chapter}{\numberline {12}{\tt fakedmd}{}: Database Manager Daemon}{42} \contentsline {section}{\numberline {12.1}Startup Procedure}{42} \contentsline {subsection}{\numberline {12.1.1}{\tt libfakefs} launches {\tt fakedmd}{}}{42} \contentsline {subsection}{\numberline {12.1.2}{\tt fakedmd}{} acquires {\tt fakefs}{} master exclusive lock}{42} \contentsline {subsection}{\numberline {12.1.3}Berkeley DB Startup}{42} \contentsline {subsection}{\numberline {12.1.4}Cache Startup}{43} \contentsline {subsection}{\numberline {12.1.5}Plugin Startup}{43} \contentsline {subsubsection}{{\tt file} Storage Method Cleanup: Added Files}{43} \contentsline {subsubsection}{{\tt file} Storage Method Cleanup: Deleted Files}{44} \contentsline {chapter}{\numberline {13}{\tt libfakefs}: Application Interface}{46} \contentsline {chapter}{\numberline {14}{\tt LD\_{}PRELOAD}{}}{47} \contentsline {section}{\numberline {14.1}File-Descriptor Juggling}{47} \contentsline {section}{\numberline {14.2}{\tt fakefsd}: Database Proxy Server}{47} \contentsline {section}{\numberline {14.3}{\tt fork}, {\tt vfork}, {\tt clone}, {\tt pthread\_{}create}, and Friends}{48} \contentsline {section}{\numberline {14.4}{\tt rename} Optimization}{48} \contentsline {section}{\numberline {14.5}What to do about {\tt statfs}?}{48} \contentsline {subsection}{\numberline {14.5.1}Complications of {\tt fakefs}{}}{48} \contentsline {subsection}{\numberline {14.5.2}Proposals}{50} \contentsline {chapter}{\numberline {15}{\tt fakefs-nfsd}: NFS Server}{51} \contentsline {chapter}{\numberline {16}Garbage Collection}{52} \contentsline {section}{\numberline {16.1}{\tt fakegcd}: Garbage Collector Daemon}{52} \contentsline {section}{\numberline {16.2}Ownership of Data}{52} \contentsline {chapter}{\numberline {17}FIXMEs}{53}