From dc526eaef000dbdda239cf7651bba32504b37983 Mon Sep 17 00:00:00 2001 From: Manuel Canales Esparcia Date: Sun, 9 Apr 2006 08:49:42 +0000 Subject: [PATCH] Added original farce-002 scripts. --- extras/farce | 874 ++++++++++++++++++++++++++++++++++++++++++++++++ extras/filelist | 161 +++++++++ 2 files changed, 1035 insertions(+) create mode 100755 extras/farce create mode 100755 extras/filelist diff --git a/extras/farce b/extras/farce new file mode 100755 index 0000000..5c9727d --- /dev/null +++ b/extras/farce @@ -0,0 +1,874 @@ +#!/bin/bash + +# FARCE: Farce Assists Rebuild Comparison Evaluation ;) +# +# to answer the question "can it rebuild itself?" +# +# We expect four arguments - first directory path, filelist +# containing the files in this directory which we wish to compare, +# second directory path, filelist for second directory. +# +# Yes, we could just compare everything in each tree, but the +# filelist script knows about files it can reasonably ignore, +# and this also allows us to build a sytem, boot it and get a +# list of files, build a full desktop environment, and only then +# build and boot the "can it build itself" test system and get +# _its_ filelist. +# +# What this script aims to do: +# ____________________________ +# +# First, report files not in both builds. +# +# Then, confirm symlinks point to same targets. +# +# After that, compare individual files - +# if different, run the file name through 'expected' +# to pick out files that are unlikely to match (logs, +# pids, fstab [assumes '/' is a different device each time], +# count these as 'expected'. +# +# For whatever is left, check the file type - ar archives +# have their members extraced and compared (every member has +# a timestamp), gzipped files are compared beyond their +# timestamp, binaries, at least those using shared libs or +# which are shared objects, are copied and subjected to +# --strip-debug. If files match at this stage, count them as +# 'accepted'. +# +# As a last step for any file that doesn't match, copy it +# through some perl regexps to "process" it (convert any +# date, time, kernel-version information from standard formats +# into tokens, then see if the tokensi match. +# +# For details of the regexps, see the tokenize function. +# Those files that match after this are also counted as +# 'accepted'. Note that I don't always start from the kernel +# version that I'm going to build, so this copes with e.g. perl +# files that hardcode the kernel version. +# +# We now have files that don't match. A few of these seem to be +# common to all builds - some (members of) c++ libraries or ar +# archives, a few programs which perhaps use some sort of c++ code). +# The file name # is passed to the 'failure' function - these +# recognized filenames are labelled as 'predictable FAIL:', +# anything else is labelled as 'unexpected FAIL:'. +# +# output: +# stderr - files only in one of the builds, failure messages, +# and totals. +# +# farce-results - more details, including which files were treated +# as expected differences, files where neither copy could be read, +# files treated as accepted, with the reason (and member for ar +# archives). This data is typically up to 100 characters wide - +# sometimes it's a bit more, but it doesn't wrap too badly in a +# 100 character xterm. +# +# farce-extras - diffs for the files, or members, that didn't +# match. This file is to establish new regexps for picking up +# date/time/kernel-version formats. +# +# farce-identical - the names of the files which are identical +# +# farce-substitutions - whenever using tokenizeanddiff results in a +# difference being accepted, for both versions diff the before and +# after versions to show what got changed. If the file is a binary, +# the output may still be hard to read. Note that I _know_ glibc +# version strings pass one of the regexps looking for a kernel version +# - since I expect you to use the same version of glibc for each +# build, this is not a problem. +# +# farce-differ - the names of the files which could not be treated +# as matching (whether or not I regard the failure as predictable) +# for possible input to ICA processing. +# +# Copyright (C) 2005, 2006 Ken Moffat +# +# All rights reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or +# NON INFRINGEMENT. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + +VERSION="002" + +# variables for output files +RESULT=farce-results +EXTRAS=farce-extras +IDENTICAL=farce-identical +SUBS=farce-substitutions +DIFFER=farce-differ + +# documenting the variables +# C1, C2 temp files to hold disassembled code +# D1, D2 temp directories for extracting member of ar archive +# DIFF temp file for diffing the filelists +# F1, F2 temp files for tokenizeanddiff +# FTYPE obsolete, commented out +# M1, M2 temp files to hold members of an ar archive +# MEMBERS temp file to list members of ar archive +# OP1, OP2 the original $PWD, needed when extracting members of ar +# archives +# P1, P2 paths to first and second build +# S1, S2 temp files for shared objects + +# functions +function dohelp() { + echo "`basename $0`: compare trees of files from a build" + echo "and lists of the files they contained" + echo "" + echo "`basename $0` [ -help | -version ] || path1 list1 path2 list2" +} + +function emessage() { + # write a string to both stderr and $RESULT + echo "$@" >&2 + echo "$@" >&5 +} + +function expected() { + # if we expect it to differ because of its name, + # allow it and report, return true ; else return false + case $1 in + /boot/grub/menu.lst) + # just in case somebody puts this into the main filesystem + true;; + /etc/aliases.db) + # some sort of database for postfix, not parsable + true;; + /etc/blkid.tab) + # includes dev name for rootfs + true;; + /etc/fstab) + # fstab, e.g. ' / ' will differ + true;; + /etc/group*) + true;; + /etc/hosts) + # with dhcp client, I add current ip address to this in a hook + true;; + /etc/ld.so.*) + # .conf and .cache can vary, + # particularly if one system has a full build when I run this + true;; + /etc/lilo.conf|/etc/yaboot.conf) + # bootloader control, I assume grub will all be on a separate + true;; + /etc/mtab) + # at a minimum, different '/' + true;; + /etc/ntp.drift) + true;; + /etc/passwd*) + true;; + /etc/shadow*) + true;; + /etc/ssh/*key|/etc/ssh/*pub) + # openssh keys + true;; + /misc/*) + # where I put buildscripts (which mostly won't change) + # and stamps containing name/time/space which will differ in the times + true;; + /root/*) + # expect .bash_history etc to differ - if we can read them + true;; + /usr/bin/lynx) + # part of my inital builds, I guess this uses anonymous namespaces + true;; + /usr/include/c++/*/*/bits/stdc++.h.gch/*) + # precompiled headers + true;; + /usr/lib*/libstdc++.a|/usr/lib*/libstdc++.so*|/usr/lib*/libsupc++.a) + # probably, anonymous namespaces + # libstdc++.a, libstdc++.so.n.n.n, libsupc++.a + true;; + /usr/share/info/dir) + # if one system has had extra stuff built, this will likely be bigger + true;; + /usr/share/man/whatis) + # if one system has had extra stuff built, this will likely be bigger + true;; + /var/lib/locate/locatedb) + # if one system has had extra stuff built, this will likely be bigger + true;; + /var/lib/nfs/*) + # allow nfs bookkeeping + true;; + /var/log/*) + true;; + /var/run/utmp) + true;; + /var/spool/fcron*) + true;; + /var/state/*) + # allow dhcp leases + true;; + /var/tmp/random-seed) + true;; + # following start with wildcards + *Image*|*.PPCBoot*|*vmlinuz*|*lfskernel*) + # compressed kernels, sometimes just building at a different + # date/time is enough to change the length of them, because the + # long format date and time is part of the compressed data + true;; + *pid*) + # pids, including e.g. /var/spool/postfix/pid/* + true;; + *) + # nothing else is expected to be different + false;; + esac + if [ $? -eq 0 ]; then + message "expected difference in $1" + let expected=$expected+1 + case $TYPE in + AR) + let EXPAR=$EXPAR+1 + ;; + ELF) + let EXPELF=$EXPELF+1 + ;; + UNK) + let EXPUNK=$EXPUNK+1 + ;; + # so far, no other valid types, so don't accumulate them + *) + emessage "internal error, expected difference for $1 of type $TYPE not allowed" + exit 2 + ;; + esac + true + else + false + fi +} + +function failure() { + # first parm is filename or token + # second parm is the error message + # update the appropriate total + # and write to both stderr and the results + # by using emessage + + let different=$different+1 + case $TYPE in + AR) + let DIFAR=$DIFAR+1 + ;; + ELF) + let DIFELF=$DIFELF+1 + ;; + GZ) + let DIFGZ=$DIFGZ+1 + ;; + SYM) + let DIFSYM=$DIFSYM+1 + ;; + UNK) + let DIFUNK=$DIFUNK+1 + ;; + *) + emessage "internal error in failure() for TYPE $TYPE" + exit 2 + ;; + esac + test -f ${P1}$1 && echo $1 >&9 + emessage "FAIL: $2" +} + +function fatal() { + # unrecoverable error + echo $* + exit 1 +} + +function filetype() { + TYPE=`file ${P1}${FILE}` + case $TYPE in + *'current ar archive'*) + let TOTAR=$TOTAR+1 + TYPE=AR + ;; + *' ELF '*) + let TOTELF=$TOTELF+1 + TYPE=ELF + ;; + *'gzip compressed data'*) + let TOTGZ=$TOTGZ+1 + TYPE=GZ + ;; + *) + let TOTUNK=$TOTUNK+1 + TYPE=UNK + ;; + esac +} + +function message() { + # write a string to $RESULT + echo $* >&5 +} + +function onlyone() { + #report files only in one build + # text should go to both stderr and the results, + # but blank lines only go to the results + if [ $1 == '<' ]; then + emessage "File(s) only in the first build" + else + emessage "File(s) only in the second build" + fi + message "" + FILES=`cat $DIFF | grep "^$1" | cut -d ' ' -f 2` + for F in $FILES; do + emessage $F + let only=$only+1 + done + message "" +} + +# 'test' functions are called with three arguments: +# the two pathes and the filename +# - we know the file is of this type, so see if we +# can get it to match by reasonalbe means. +# if not, treat it as different. +# +# NB if pathes are absolute, we need to prefix them +# with the original $PWD to access the .a files +# +function testar() { + # ar archives include timestamps for the members, + # but diff doesn't show file timestamps unless the data differs + # put out a message to help locate which archive any messages + # about the members refer to. + + # try just stripping them U1,2 undebuggable + U1=`mktemp` || fatal "cannot create a temporary file" + U2=`mktemp` || fatal "cannot create a temporary file" + cp ${1}${3} $U1 + cp ${2}${3} $U2 + strip --strip-debug $U1 + strip --strip-debug $U2 + cmp -s $U1 $U2 + rm $U1 $U2 + if [ $? -eq 0 ]; then + let accepted=$accepted+1 + let ACCAR=$ACCAR+1 + message "archive $3 matches after strip --strip-debug" + return + fi + # rest of this function retained primarily for pathologically bad builds + # put out a message in the log to help identify which archive has issues. + message "examining ar archive $3" + D1=`mktemp -d` || fatal "cannot create a temporary directory" + D2=`mktemp -d` || fatal "cannot create a temporary directory" + cd $D1 + ar -x ${OP1}${1}${3} + cd $D2 + ar -x ${OP2}${2}${3} + cd + # diff the members - true means they match + diff -Na $D1 $D2 >/dev/null + if [ $? -eq 0 ]; then + message "accept: $3 after diffing the members" + let accepted=$accepted+1 + let ACCAR=$ACCAR+1 + else + # process individual members to eliminate date/time/kernel-version + # first, check the members are the same + M1=`mktemp` || fatal "cannot create a temporary file" + M2=`mktemp` || fatal "cannot create a temporary file" + cd $D1 + MEMBERS= + for F in *; do + MEMBERS="$MEMBERS $F" + done + cd + echo $MEMBERS | sort >$M1 + cd $D2 + MEMBERS= + for F in *; do + MEMBERS="$MEMBERS $F" + done + cd + echo $MEMBERS | sort >$M2 + cmp -s $M1 $M2 + if [ $? -ne 0 ]; then + # oh dear, different members + echo "list of members differs for archive $3" >&6 + diff $M1 $M2 >&6 + failure $3 "$3 list of members differs" + else + # members (names) are same, + # process each one + STATUS=0 + for M in $MEMBERS; do + #avoid firing up perl on matching members + cmp -s $D1/$M $D2/$M + if [ $? -ne 0 ]; then + tokenizeanddiff $D1/$M $D2/$M $FILE:$M + if [ $? -eq 0 ]; then + message "member $M matches after processing" + else + message "member $M DIFFERS after processing" + STATUS=1 + fi + fi + done + if [ $STATUS -eq 0 ]; then + let accepted=$accepted+1 + let ACCAR=$ACCAR+1 + else + let different=$different+1 + let DIFAR=$DIFAR+1 + echo $3 >&9 + emessage "FAIL: in $3" + fi + fi + rm $M1 $M2 + fi + rm -rf $D1 $D2 +} + +function testgzip() { + # bytes 4,5,6,7 are the timestamp, so ignore these + cmp -s -i 8 ${1}${3} ${2}${3} + if [ $? -eq 0 ]; then + message "accept: $3 after ignoring gzip timestamp" + let accepted=$accepted+1 + let ACCGZ=$ACCGZ+1 + else + failure $3 " $3 even after ignoring gzip timestamp" + fi +} + +function testso() { + # shared object - first try stripping it + # in fact, this now handles ALL ELF files + S1=`mktemp` || fatal "cannot create a temporary file" + S2=`mktemp` || fatal "cannot create a temporary file" + cp ${1}${3} $S1 + strip --strip-debug $S1 + cp ${2}${3} $S2 + strip --strip-debug $S2 + cmp -s $S1 $S2 + if [ $? -eq 0 ]; then + message "accept: $3 after --strip-debug" + let accepted=$accepted+1 + let ACCELF=$ACCELF+1 + else + tokenizeanddiff $S1 $S2 $3 + if [ $? -ne 0 ]; then + failure $3 " $3 differs after stripping and processing" + else + message "accept: $3 after --strip-debug and processing" + let accepted=$accepted+1 + let ACCELF=$ACCELF+1 + fi + fi + rm $S1 $S2 +} + +function tokenize() { + # use regexes to replace date/time/kernel-version text + # with tokens which may allow files to match even though + # they have hardcoded date/time/kernel-version. + # arguments are file to process, and where to put it. + # these regexes are somewhat long, and the order they + # are applied in is important (to stop short ones being + # used when a longer version would match). + # KV00 linux version date (e.g. as in the kernel itself) + # allow 2 or 3 groups of three alphas here - optional smp, with day, mon + # KV01 kernel version, including possible cpu details (that is for cdda2wav) + # KV02 just the version, in quotes e.g. "2.6.12.6" or '2.6.13', for perl stuff + # except that "|' gives me grif, so try a boundary + # also, it might need local version on the end, I really want + # quote2.\d+.\d+.{0,32}quote - it is the quotes that don't work. + # DT00 Day Mon .d+ hh:mm:ss TZN CCYY variations include non-caps and 'mon d' + # DT01 Mon .d+ CCYY hh:mm:ss + # DT02 hh:mm:ss Mon .d CCYY + # DT03 Mon .d CCYY + # DT04 Day Mon { ,d}d hh:mm:ss CCYY - for groff example postscript files + # (somewhat similar to DT00, but ' d' or ' dd' for day of month and no TZN ) + # DT05 hh:mm:ss + # DT06 ISO date using space as separator + # DT07 ISO date using dash as separator + # DT08 ISO date using slash as separator + # DT09 fullmonth (capitalised), day number, comma, 4-digit year (groff 1.18.1 ps) + # DT10 dd, fullmonth (capitalised), 4-digit year (groff 1.18.1 manpages) + # DT11 '(xample comma space digit(s) backslash ) in groff memef.ps which is + # quite clearly the day of the month when it was compiled, preceded by 'example' + # with something weird between the e and the x. + + if [ $# -ne 2 ]; then + fatal "tokenizing called with $# arguments : $*" + fi + + cat $1 | perl -p \ + -e 's/(L|l)inux.*\d\.\d\.\d+.* \#\d+( [A-Za-z][a-z]{2}){2,3} \d+ \d\d:\d\d:\d\d [A-Za-z]{3} \d{4}\b/%KV00%/g;' \ + -e 's/(L|l)inux( (\w|_)+)?(-| |_)\d\.\d(\.\d+){1,2}((-|_)?(\w|_)+)?( |\000)*/%KV01%/g;' \ + -e 's/\W2(\.\d+){2,3}(-|_)?((\w|_)+)?\s*\W/%KV02%/g;' \ + -e 's/\b([A-Za-z][a-z]{2} ){2}( |\d)?\d \d\d:\d\d:\d\d [A-Za-z]{3} \d{4}\b/%DT00%/g;' \ + -e 's/\b[A-Z][a-z]{2} ( |\d)\d \d{4} \d\d:\d\d:\d\d\b/%DT01%/g;' \ + -e 's/\b\d\d:\d\d:\d\d [A-Z][a-z]{2} ( |\d)\d \d{4}\b/%DT02%/g;' \ + -e 's/\b[A-Z][a-z]{2} ( |\d)\d \d{4}\b/%DT03%/g;' \ + -e 's/\b([A-Z][a-z]{2} ){2}( |\d)\d \d\d:\d\d:\d\d \d{4}/%DT04%/g;' \ + -e 's/\b\d\d:\d\d:\d\d\b/%DT05%/g;' \ + -e 's/\b\d{4} \d\d \d\d\b/%DT06%/g;' \ + -e 's/\b\d{4}-\d\d-\d\d\b/%DT07%/g;' \ + -e 's/\b\d{4}\/\d\d\/\d\d\b/%DT08%/g;' \ + -e 's/\b[A-Z][a-z]{2,} \d{1,2}, \d{4}/%DT09%/g;' \ + -e 's/\b\d\d [A-Z][a-z]{2,} \d{4}/%DT10%/g;' \ + -e 's/\(xample, \d{1,2}\\\)/%DT11%/g;' \ + >$2 +} + +function tokenizeanddiff() { + # Call tokenize for the inputs, then compare the results + # Input arguments are path/filename for old and new versions + # third parm is readable name (filename, or archivename:member) + # to help understand what is in the extras output. + # - sometimes called for files, but other times called for + # members of ar archives extracted into temporary directories + #message tokenizeanddiff called for $1 $2 $3 + F1=`mktemp` || fatal "cannot create a temporary file" + F2=`mktemp` || fatal "cannot create a temporary file" + tokenize $1 $F1 + tokenize $2 $F2 + + # actually, cmp is probably more efficient + # but for picking up the pieces it will be better to + # use diff to see what got through. + cmp -s $F1 $F2 + TOKENRESULT=$? + if [ $TOKENRESULT -ne 0 ]; then + echo "failure in $3..." >&6 + diff -a $F1 $F2 >&6 + rm $F1 $F2 + false + else + # show what we did + echo "substitutions for $3" >&8 + echo "build one" >&8 + diff -a $1 $F1 >&8 + echo "build two" >&8 + diff -a $2 $F2 >&8 + rm $F1 $F2 + true + fi +} + +function validateargs() { +# validate the arguments +BAD=0 +if ! [ -d $1 ]; then + echo "Error: first argument is not a directory" >&2 + let BAD=$BAD+1 +fi +NAME=`basename ${2%%-*}` +if [ $NAME != filelist ]; then + echo "Error: second argument is not a recognized filelist" >&2 + let BAD=$BAD+1 +fi +if ! [ -d $3 ]; then + echo "Error: third argument is not a directory" >&2 + let BAD=$BAD+1 +fi +NAME=`basename ${4%%-*}` +if [ $NAME != filelist ]; then + echo "Error: fourth argument is not a recognized filelist" >&2 + let BAD=$BAD+1 +fi +for I in $1 $2 $3 $4; do + if ! [ -r $I ]; then + echo "Error: cannot read $I" >&2 + let BAD=$BAD+1 + fi +done +if [ $1 == $3 ]; then + echo "Error: directory pathes are identical" >&2 + let BAD=$BAD+1 +fi +if [ $2 == $4 ]; then + echo "Error: filelist names are identical" >&2 + let BAD=$BAD+1 +fi +if [ $BAD -eq 0 ]; then + ARGS=valid +fi +} + +# Mainline +ARGS=unproven +OUTDIR= +if [ $# -eq 1 ]; then + case $1 in + -version|--version) + echo "`basename $0` version $VERSION" + exit 0 + ;; + -help|--help) + dohelp + exit 0 + ;; + esac +fi +if [ $1 = "--directory" ]; then + OUTDIR=$2 + shift 2 + grep '/$' $OUTDIR >/dev/null 2>&1 || OUTDIR=`echo $OUTDIR | sed 's%$%/%'` + echo "creating directory $OUTDIR" + mkdir -p $OUTDIR + if [ $? -ne 0 ]; then + echo "cannot mkdir $OUTDIR" + exit 1 + fi +fi +if [ $# -eq 4 ]; then + validateargs $* +fi +if ! [ $ARGS == valid ]; then + dohelp + fatal "`basename $0`: error in arguments" +fi + +# ok, we're happy, lets hit these files +exec 5>${OUTDIR}$RESULT +exec 6>${OUTDIR}$EXTRAS +exec 7>${OUTDIR}$IDENTICAL +exec 8>${OUTDIR}$SUBS +exec 9>${OUTDIR}$DIFFER + +>${OUTDIR}$RESULT +if [ $? -ne 0 ]; then + fatal "cannot write to ${OUTDIR}$RESULT" +fi + +emessage "will compare:" +emessage " first build at $1 with files listed in $2" +emessage "second build at $3 with files listed in $4" + +let accepted=0 +let different=0 +let expected=0 +let matched=0 +let only=0 +let predictable=0 +let unreadable=0 +let total=0 + +# break down the accepted +let ACCAR=0 +let ACCELF=0 +let ACCGZ=0 +let ACCUNK=0 + +# break down definitely different +let DIFAR=0 +let DIFELF=0 +let DIFGZ=0 +let DIFSYM=0 +let DIFUNK=0 + +# break down the expected differences +let EXPAR=0 +let EXPELF=0 +let EXPGZ=0 +let EXPUNK=0 + +# break down the identical files +let MATAR=0 +let MATELF=0 +let MATGZ=0 +let MATSYM=0 +let MATUNK=0 + +# break down how many of each type +let TOTAR=0 +let TOTELF=0 +let TOTGZ=0 +let TOTSYM=0 +let TOTUNK=0 + +# now identify differences between the two trees +DIFF=`mktemp` || fatal "cannot create a temporary file" +diff $2 $4 >$DIFF + +for RUN in '<' '>' ; do + grep -q "$RUN" $DIFF && onlyone "$RUN" +done + +rm $DIFF + +# and compare them +message "Results of file comparison:" +message "" + +# Strip any trailing slash from the path for tidyness, +# because the filenames all start with a slash. Unfortunately, +# unfortunately, '/' becomes empty, which breaks subroutines, +# so special case it. +# also, to process ar archives we need to extract them in temp +# directories - that means that after cd'ing we've broken any +# relative path, so save original pwd as necessary. +P1=`echo $1 | sed 's%/$%%'` +echo $1 | grep '^/' >/dev/null +if [ $? -ne 0 ]; then + # relative path + OP1=${PWD}/ + #echo "setting OP1 to $OP1" +else + OP1= + #echo "$1 is an absolute path" +fi +test -z "$P1" && P1='/' +P2=`echo $3 | sed 's%/$%%'` +echo $3 | grep '^/' >/dev/null +if [ $? -ne 0 ]; then + # relative path + OP2=${PWD}/ + #echo "setting OP2 to $OP2" +else + OP2= + #echo "$3 is an absolute path" +fi +test -z "$P2" && P2='/' + +echo "about to read $2" +while read FILE ; do +#echo "process $FILE" +#echo "test existence of ${P2}${FILE}" + # confirm it exists in second build + # we have already reported files only in one build + if [ -f ${P2}"${FILE}" ]; then + let total=$total+1 + # check we can read both of them + # or count as unreadable - I used to separate only-one-unreadable, + # but if you compre '/' and a _copy_ of /mnt/lfs that assumption + # breaks, so be less picky. + if ! [ -r "${P1}${FILE}" ] || ! [ -r "${P2}${FILE}" ]; then + message "cannot read one or both versions of $FILE" + let unreadable=$unreadable+1 + continue + fi + if [ -h "${P1}${FILE}" ]; then + # for symlink, look at what it points to + # exceptionally, do not call filetype + TYPE=SYM + let TOTSYM=$TOTSYM+1 + SL1=`ls -l "${P1}${FILE}" | awk '{ print $11 }'` + SL2=`ls -l "${P2}${FILE}" | awk '{ print $11 }'` + if [ "$SL1" = "$SL2" ]; then + echo "symlink $FILE matches for $SL1" >&5 + let matched=$matched+1 + let MATSYM=$MATSYM+1 + else + failure TARGET " symlink $FILE points to $SL1 and $SL2" + echo $FILE >&9 + fi + else + # regular file, start by typing it for accounting, + # then compare it + filetype ${P1}${FILE} + cmp -s "${P1}${FILE}" "${P2}${FILE}" + if [ $? -eq 0 ]; then + let matched=$matched+1 + case $TYPE in + AR) + let MATAR=$MATAR+1 + ;; + ELF) + let MATELF=$MATELF+1 + ;; + GZ) + let MATGZ=$MATGZ+1 + ;; + UNK) + let MATUNK=$MATUNK+1 + ;; + *) + echo "unexpected TYPE of $TYPE for $FILE" >&2 + exit 2 + ;; + esac + echo ${FILE} >&7 + else + # seems different, can we do better ? + # test if we expect it to differ + expected $FILE + if [ $? -ne 0 ]; then + case $TYPE in + GZ) + testgzip $P1 $P2 $FILE ;; + AR) + testar $P1 $P2 $FILE ;; + ELF) + testso $P1 $P2 $FILE ;; + *) + # long-stop - strip dates from text files + tokenizeanddiff "${P1}${FILE}" "${P2}${FILE}" "$FILE" + if [ $? -eq 0 ]; then + message "accepted $FILE after processing" + let accepted=$accepted+1 + let ACCUNK=$ACCUNK+1 + else + failure "$FILE" " $FILE is different" + fi + ;; + esac + fi + fi + fi + fi +done < $2 + +message "" +# write totals to stderr as well as the results file +emessage "$only files in only one of the builds" +emessage "$total files compared, of which" +emessage "$unreadable files could not be read, skipped" +emessage "$matched files are identical" +emessage "$expected files differed as expected" +emessage "$accepted files had allowable differences" +#emessage "$predictable files differed as they normally do" +emessage "$different files differed" + +# totals of different file types +emessage "" +emessage "$TOTAR ar archives" +emessage " of which $MATAR are identical" +emessage " of which $ACCAR are accepted after strip-debug or extracting, diffing, tokenizing" +emessage " of which $EXPAR differed as expected" +emessage " of which $DIFAR differed" +emessage "$TOTELF ELF executables or shared libraries" +emessage " of which $MATELF are identical" +emessage " of which $ACCELF are accepted after stripping and tokenizing" +emessage " of which $EXPELF differed as expected" +emessage " of which $DIFELF differed" +emessage "$TOTGZ gzipped files" +emessage " of which $MATGZ are identical" +emessage " of which $ACCGZ are accepted after comparing beyond timestamp" +emessage " of which $DIFGZ are different" +emessage "$TOTSYM symbolic links" +emessage " of which $MATSYM are identical" +emessage " of which $DIFSYM have different targets" +emessage "$TOTUNK other files" +emessage " of which $MATUNK are identical" +emessage " of which $ACCUNK are accepted after tokenizing" +emessage " of which $EXPUNK differed as expected" +emessage " of which $DIFUNK differed" + diff --git a/extras/filelist b/extras/filelist new file mode 100755 index 0000000..77d58ab --- /dev/null +++ b/extras/filelist @@ -0,0 +1,161 @@ +#!/bin/bash +# +# filelist - prepare a list of the files in a built system. +# This is for use in comparing two builds, to answer the +# question "can it build itself" ? Obviously, it can also be +# used to compare two partial builds to see what changed. +# +# Call this script with the path to the root of the system. +# If you are running the new system, the path is probably '/'. +# Alternatively, the system might be mounted at /mnt/lfs, or +# it might have been copied to ~/build1 or wherever. +# +# The output is a single file, filelist-CCYYMMDDhhmm +# e.g. filelist-200510052108 +# which contains a list of the files to compare. +# It excludes certain files which are not of interest (/tools, +# /cross-tools, /home) together with any mounted filesystems. +# +# you should run this as a regular user - this will cause files +# in e.g. /root to be ignored. +# +# I like to build a graphical desktop before using a new system +# to see if it can build itself. Filelist supports this by allowing +# you to list the files at any time. My normal process is: +# +# 1. Build an LFS system as normal. +# 2. Build some extras before booting (nfs client, ntp, openssh, +# lynx, dhcp client, etc). +# 3. Boot the new (first) system, set up users, run filelist. +# 4. Build X and whatever else I want to use. +# 5. Build the second system. +# 6. Build the same extras for the second system. +# 7. Boot the second system, to prove it works. +# 8. Reboot to the first system, mount the second system at /mnt/lfs, +# then run filelist on /mnt/lfs and run the comparison. +# +# Copyright (C) 2005, 2006 Ken Moffat +# +# All rights reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or +# NON INFRINGEMENT. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + +VERSION="002" + +function error() { + echo "usage:" + echo " `basename $0` [--whole-build] path name for output file]" + echo " e.g. '/' or '/mnt/lfs' or 'build1'" + echo " use --whole-build to compare files from /tools or /cross-tools" + echo + echo "Error: $1" + exit 1 +} + + +# process the argument, if there is one +# normally, we exclude files in /tools and /cross-tools +EXCLUDE=true + +case "$1" in + +--whole-build) + # do NOT exclude cross-tools and tools + EXCLUDE= + shift + ;; +--*) + error "Bad option $1" + ;; + +esac + + +if [ $# -lt 1 ]; then + error "no argument" +elif [ $1 = "--version" ]; then + echo "$0 version $VERSION" + exit +elif [ $# -gt 2 ]; then + error "more than two arguments" +elif ! [ -d $1 ]; then + error "not a directory" +fi +if [ $# -eq 2 ]; then + OUTFILE=$2 + if [ -e $2 ]; then + echo "output $2 already exists" + exit + fi +else + NOW=`date +%Y%m%d%H%M` + OUTFILE=~/filelist-${NOW} +fi + +if [ "$1" == "/" ]; then + LOC=$1 +else + # ensure the path or mountpoint ends with a slash + # because of the seds after the 'find' + LOC=`echo $1 | sed 's%[^/]$%&/%'` +fi + +echo "will create file list in $OUTFILE" +if [ -f $OUTFILE ]; then + echo "refusing to overwrite $OUTFILE" + exit 1 +fi + +# check we can indeed do this +>$OUTFILE +if [ $? -ne 0 ]; then + echo "error, cannot write to $OUTFILE" + exit 1 +fi + +# Explanation of the find command: we exclude any filesystems mounted below +# this path (typically, you are looking at '/' so this excludes /proc /sys +# /dev). We only care about files, not directories. +# Exclusions - insert whatever we were given at the start of the pathes to +# exclude - this might mean we have '//' in them, but that shouldn't matter. +# /tools* /tools and also e.g. /tools.old if you rename an old version +# /cross-tools* similar, for cross-lfs +# /home/* - in case /home is not a separate filesystem +# /sources/* - where the book thinks you should keep sources +# /tmp/* - we really don't care about any junk left in here +# /misc/* - where I keep buildscripts and stamps +# we then sed it so that / or /mnt/lfs/ or whatever are all converted to '/'. + +# At one time, EXCLUDE contained the ! -path strings for cross-tools and tools +# but htat didn't work, so now it is just a marker to control this logic. +if [ -z "$EXCLUDE" ]; then + find $LOC -xdev -xtype f \ + ! -path "${LOC}home/*" \ + ! -path "${LOC}sources/*" ! -path "${LOC}tmp/*" \ + ! -path "${LOC}misc/*" | sed "s%^${LOC}%/%" | sort >$OUTFILE +else + find $LOC -xdev -xtype f \ + ! -path "${LOC}cross-tools*/*" ! -path "${LOC}tools/*" \ + ! -path "${LOC}home/*" \ + ! -path "${LOC}sources/*" ! -path "${LOC}tmp/*" \ + ! -path "${LOC}misc/*" | sed "s%^${LOC}%/%" | sort >$OUTFILE +fi + +exit + + +