diff options
52 files changed, 1265 insertions, 153 deletions
@@ -194,6 +194,7 @@ Philippe Bruhat <book@cpan.org> Ralf Thielow <ralf.thielow@gmail.com> <ralf.thielow@googlemail.com> Ramsay Jones <ramsay@ramsayjones.plus.com> <ramsay@ramsay1.demon.co.uk> RenĂ© Scharfe <l.s.r@web.de> <rene.scharfe@lsrfire.ath.cx> +RenĂ© Scharfe <l.s.r@web.de> Rene Scharfe Richard Hansen <rhansen@rhansen.org> <hansenr@google.com> Richard Hansen <rhansen@rhansen.org> <rhansen@bbn.com> Robert Fitzsimons <robfitz@273k.net> diff --git a/Documentation/Makefile b/Documentation/Makefile index 2415e0d657..471bb29725 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -67,6 +67,7 @@ SP_ARTICLES += howto/maintain-git API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technical/api-index.txt, $(wildcard technical/api-*.txt))) SP_ARTICLES += $(API_DOCS) +TECH_DOCS += technical/hash-function-transition TECH_DOCS += technical/http-protocol TECH_DOCS += technical/index-format TECH_DOCS += technical/pack-format diff --git a/Documentation/RelNotes/2.15.0.txt b/Documentation/RelNotes/2.15.0.txt index 7b8eeb52b6..748f3c9980 100644 --- a/Documentation/RelNotes/2.15.0.txt +++ b/Documentation/RelNotes/2.15.0.txt @@ -99,6 +99,10 @@ UI, Workflows & Features operations in the same repository. The new "--no-optional-locks" option can be passed to Git to disable them. + * "git for-each-ref --format=..." learned a new format element, + %(trailers), to show only the commit log trailer part of the log + message. + Performance, Internal Implementation, Development Support etc. @@ -230,6 +234,11 @@ Performance, Internal Implementation, Development Support etc. throughout the life of the program have been marked with UNLEAK marker to help the leak checkers concentrate on real leaks.. + * Plans for weaning us off of SHA-1 has been documented. + + * A new "oidmap" API has been introduced and oidset API has been + rewritten to use it. + Also contains various documentation updates and code clean-ups. @@ -485,6 +494,9 @@ Fixes since v2.14 there. (merge bea4dbeafd tb/ref-filter-empty-modifier later to maint). + * An ancient bug that made Git misbehave with creation/renaming of + refs has been fixed. + * Other minor doc, test and build updates and code cleanups. (merge f094b89a4d ma/parse-maybe-bool later to maint). (merge 39b00fa4d4 jk/drop-sha1-entry-pos later to maint). @@ -516,3 +528,8 @@ Fixes since v2.14 (merge a9155c50bd sb/branch-avoid-repeated-strbuf-release later to maint). (merge f777623514 ks/branch-tweak-error-message-for-extra-args later to maint). (merge 33f3c683ec ks/verify-filename-non-option-error-message-tweak later to maint). + (merge b3ea7dd32d jk/sha1-loose-object-info-fix later to maint). + (merge 2720f6db5d rs/fsck-null-return-from-lookup later to maint). + (merge 99b7b687a6 rs/rs-mailmap later to maint). + (merge 7823655082 tb/complete-describe later to maint). + (merge 7cbbf9d6a2 ls/filter-process-delayed later to maint). diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt index cbd0a6212a..1d420e4cde 100644 --- a/Documentation/git-for-each-ref.txt +++ b/Documentation/git-for-each-ref.txt @@ -218,11 +218,15 @@ and `date` to extract the named component. The complete message in a commit and tag object is `contents`. Its first line is `contents:subject`, where subject is the concatenation of all lines of the commit message up to the first blank line. The next -line is 'contents:body', where body is all of the lines after the first +line is `contents:body`, where body is all of the lines after the first blank line. The optional GPG signature is `contents:signature`. The first `N` lines of the message is obtained using `contents:lines=N`. Additionally, the trailers as interpreted by linkgit:git-interpret-trailers[1] -are obtained as 'contents:trailers'. +are obtained as `trailers` (or by using the historical alias +`contents:trailers`). Non-trailer lines from the trailer block can be omitted +with `trailers:only`. Whitespace-continuations can be removed from trailers so +that each trailer appears on a line by itself with its full content with +`trailers:unfold`. Both can be used together as `trailers:unfold,only`. For sorting purposes, fields with numeric values sort in numeric order (`objectsize`, `authordate`, `committerdate`, `creatordate`, `taggerdate`). diff --git a/Documentation/technical/api-argv-array.txt b/Documentation/technical/api-argv-array.txt index cfc063018c..870c8edbfb 100644 --- a/Documentation/technical/api-argv-array.txt +++ b/Documentation/technical/api-argv-array.txt @@ -8,7 +8,7 @@ always NULL-terminated at the element pointed to by `argv[argc]`. This makes the result suitable for passing to functions expecting to receive argv from main(), or the link:api-run-command.html[run-command API]. -The link:api-string-list.html[string-list API] is similar, but cannot be +The string-list API (documented in string-list.h) is similar, but cannot be used for these purposes; instead of storing a straight string pointer, it contains an item structure with a `util` field that is not compatible with the traditional argv interface. diff --git a/Documentation/technical/hash-function-transition.txt b/Documentation/technical/hash-function-transition.txt new file mode 100644 index 0000000000..417ba491d0 --- /dev/null +++ b/Documentation/technical/hash-function-transition.txt @@ -0,0 +1,797 @@ +Git hash function transition +============================ + +Objective +--------- +Migrate Git from SHA-1 to a stronger hash function. + +Background +---------- +At its core, the Git version control system is a content addressable +filesystem. It uses the SHA-1 hash function to name content. For +example, files, directories, and revisions are referred to by hash +values unlike in other traditional version control systems where files +or versions are referred to via sequential numbers. The use of a hash +function to address its content delivers a few advantages: + +* Integrity checking is easy. Bit flips, for example, are easily + detected, as the hash of corrupted content does not match its name. +* Lookup of objects is fast. + +Using a cryptographically secure hash function brings additional +advantages: + +* Object names can be signed and third parties can trust the hash to + address the signed object and all objects it references. +* Communication using Git protocol and out of band communication + methods have a short reliable string that can be used to reliably + address stored content. + +Over time some flaws in SHA-1 have been discovered by security +researchers. https://shattered.io demonstrated a practical SHA-1 hash +collision. As a result, SHA-1 cannot be considered cryptographically +secure any more. This impacts the communication of hash values because +we cannot trust that a given hash value represents the known good +version of content that the speaker intended. + +SHA-1 still possesses the other properties such as fast object lookup +and safe error checking, but other hash functions are equally suitable +that are believed to be cryptographically secure. + +Goals +----- +Where NewHash is a strong 256-bit hash function to replace SHA-1 (see +"Selection of a New Hash", below): + +1. The transition to NewHash can be done one local repository at a time. + a. Requiring no action by any other party. + b. A NewHash repository can communicate with SHA-1 Git servers + (push/fetch). + c. Users can use SHA-1 and NewHash identifiers for objects + interchangeably (see "Object names on the command line", below). + d. New signed objects make use of a stronger hash function than + SHA-1 for their security guarantees. +2. Allow a complete transition away from SHA-1. + a. Local metadata for SHA-1 compatibility can be removed from a + repository if compatibility with SHA-1 is no longer needed. +3. Maintainability throughout the process. + a. The object format is kept simple and consistent. + b. Creation of a generalized repository conversion tool. + +Non-Goals +--------- +1. Add NewHash support to Git protocol. This is valuable and the + logical next step but it is out of scope for this initial design. +2. Transparently improving the security of existing SHA-1 signed + objects. +3. Intermixing objects using multiple hash functions in a single + repository. +4. Taking the opportunity to fix other bugs in Git's formats and + protocols. +5. Shallow clones and fetches into a NewHash repository. (This will + change when we add NewHash support to Git protocol.) +6. Skip fetching some submodules of a project into a NewHash + repository. (This also depends on NewHash support in Git + protocol.) + +Overview +-------- +We introduce a new repository format extension. Repositories with this +extension enabled use NewHash instead of SHA-1 to name their objects. +This affects both object names and object content --- both the names +of objects and all references to other objects within an object are +switched to the new hash function. + +NewHash repositories cannot be read by older versions of Git. + +Alongside the packfile, a NewHash repository stores a bidirectional +mapping between NewHash and SHA-1 object names. The mapping is generated +locally and can be verified using "git fsck". Object lookups use this +mapping to allow naming objects using either their SHA-1 and NewHash names +interchangeably. + +"git cat-file" and "git hash-object" gain options to display an object +in its sha1 form and write an object given its sha1 form. This +requires all objects referenced by that object to be present in the +object database so that they can be named using the appropriate name +(using the bidirectional hash mapping). + +Fetches from a SHA-1 based server convert the fetched objects into +NewHash form and record the mapping in the bidirectional mapping table +(see below for details). Pushes to a SHA-1 based server convert the +objects being pushed into sha1 form so the server does not have to be +aware of the hash function the client is using. + +Detailed Design +--------------- +Repository format extension +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A NewHash repository uses repository format version `1` (see +Documentation/technical/repository-version.txt) with extensions +`objectFormat` and `compatObjectFormat`: + + [core] + repositoryFormatVersion = 1 + [extensions] + objectFormat = newhash + compatObjectFormat = sha1 + +Specifying a repository format extension ensures that versions of Git +not aware of NewHash do not try to operate on these repositories, +instead producing an error message: + + $ git status + fatal: unknown repository extensions found: + objectformat + compatobjectformat + +See the "Transition plan" section below for more details on these +repository extensions. + +Object names +~~~~~~~~~~~~ +Objects can be named by their 40 hexadecimal digit sha1-name or 64 +hexadecimal digit newhash-name, plus names derived from those (see +gitrevisions(7)). + +The sha1-name of an object is the SHA-1 of the concatenation of its +type, length, a nul byte, and the object's sha1-content. This is the +traditional <sha1> used in Git to name objects. + +The newhash-name of an object is the NewHash of the concatenation of its +type, length, a nul byte, and the object's newhash-content. + +Object format +~~~~~~~~~~~~~ +The content as a byte sequence of a tag, commit, or tree object named +by sha1 and newhash differ because an object named by newhash-name refers to +other objects by their newhash-names and an object named by sha1-name +refers to other objects by their sha1-names. + +The newhash-content of an object is the same as its sha1-content, except +that objects referenced by the object are named using their newhash-names +instead of sha1-names. Because a blob object does not refer to any +other object, its sha1-content and newhash-content are the same. + +The format allows round-trip conversion between newhash-content and +sha1-content. + +Object storage +~~~~~~~~~~~~~~ +Loose objects use zlib compression and packed objects use the packed +format described in Documentation/technical/pack-format.txt, just like +today. The content that is compressed and stored uses newhash-content +instead of sha1-content. + +Pack index +~~~~~~~~~~ +Pack index (.idx) files use a new v3 format that supports multiple +hash functions. They have the following format (all integers are in +network byte order): + +- A header appears at the beginning and consists of the following: + - The 4-byte pack index signature: '\377t0c' + - 4-byte version number: 3 + - 4-byte length of the header section, including the signature and + version number + - 4-byte number of objects contained in the pack + - 4-byte number of object formats in this pack index: 2 + - For each object format: + - 4-byte format identifier (e.g., 'sha1' for SHA-1) + - 4-byte length in bytes of shortened object names. This is the + shortest possible length needed to make names in the shortened + object name table unambiguous. + - 4-byte integer, recording where tables relating to this format + are stored in this index file, as an offset from the beginning. + - 4-byte offset to the trailer from the beginning of this file. + - Zero or more additional key/value pairs (4-byte key, 4-byte + value). Only one key is supported: 'PSRC'. See the "Loose objects + and unreachable objects" section for supported values and how this + is used. All other keys are reserved. Readers must ignore + unrecognized keys. +- Zero or more NUL bytes. This can optionally be used to improve the + alignment of the full object name table below. +- Tables for the first object format: + - A sorted table of shortened object names. These are prefixes of + the names of all objects in this pack file, packed together + without offset values to reduce the cache footprint of the binary + search for a specific object name. + + - A table of full object names in pack order. This allows resolving + a reference to "the nth object in the pack file" (from a + reachability bitmap or from the next table of another object + format) to its object name. + + - A table of 4-byte values mapping object name order to pack order. + For an object in the table of sorted shortened object names, the + value at the corresponding index in this table is the index in the + previous table for that same object. + + This can be used to look up the object in reachability bitmaps or + to look up its name in another object format. + + - A table of 4-byte CRC32 values of the packed object data, in the + order that the objects appear in the pack file. This is to allow + compressed data to be copied directly from pack to pack during + repacking without undetected data corruption. + + - A table of 4-byte offset values. For an object in the table of + sorted shortened object names, the value at the corresponding + index in this table indicates where that object can be found in + the pack file. These are usually 31-bit pack file offsets, but + large offsets are encoded as an index into the next table with the + most significant bit set. + + - A table of 8-byte offset entries (empty for pack files less than + 2 GiB). Pack files are organized with heavily used objects toward + the front, so most object references should not need to refer to + this table. +- Zero or more NUL bytes. +- Tables for the second object format, with the same layout as above, + up to and not including the table of CRC32 values. +- Zero or more NUL bytes. +- The trailer consists of the following: + - A copy of the 20-byte NewHash checksum at the end of the + corresponding packfile. + + - 20-byte NewHash checksum of all of the above. + +Loose object index +~~~~~~~~~~~~~~~~~~ +A new file $GIT_OBJECT_DIR/loose-object-idx contains information about +all loose objects. Its format is + + # loose-object-idx + (newhash-name SP sha1-name LF)* + +where the object names are in hexadecimal format. The file is not +sorted. + +The loose object index is protected against concurrent writes by a +lock file $GIT_OBJECT_DIR/loose-object-idx.lock. To add a new loose +object: + +1. Write the loose object to a temporary file, like today. +2. Open loose-object-idx.lock with O_CREAT | O_EXCL to acquire the lock. +3. Rename the loose object into place. +4. Open loose-object-idx with O_APPEND and write the new object +5. Unlink loose-object-idx.lock to release the lock. + +To remove entries (e.g. in "git pack-refs" or "git-prune"): + +1. Open loose-object-idx.lock with O_CREAT | O_EXCL to acquire the + lock. +2. Write the new content to loose-object-idx.lock. +3. Unlink any loose objects being removed. +4. Rename to replace loose-object-idx, releasing the lock. + +Translation table +~~~~~~~~~~~~~~~~~ +The index files support a bidirectional mapping between sha1-names +and newhash-names. The lookup proceeds similarly to ordinary object +lookups. For example, to convert a sha1-name to a newhash-name: + + 1. Look for the object in idx files. If a match is present in the + idx's sorted list of truncated sha1-names, then: + a. Read the corresponding entry in the sha1-name order to pack + name order mapping. + b. Read the corresponding entry in the full sha1-name table to + verify we found the right object. If it is, then + c. Read the corresponding entry in the full newhash-name table. + That is the object's newhash-name. + 2. Check for a loose object. Read lines from loose-object-idx until + we find a match. + +Step (1) takes the same amount of time as an ordinary object lookup: +O(number of packs * log(objects per pack)). Step (2) takes O(number of +loose objects) time. To maintain good performance it will be necessary +to keep the number of loose objects low. See the "Loose objects and +unreachable objects" section below for more details. + +Since all operations that make new objects (e.g., "git commit") add +the new objects to the corresponding index, this mapping is possible +for all objects in the object store. + +Reading an object's sha1-content +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The sha1-content of an object can be read by converting all newhash-names +its newhash-content references to sha1-names using the translation table. + +Fetch +~~~~~ +Fetching from a SHA-1 based server requires translating between SHA-1 +and NewHash based representations on the fly. + +SHA-1s named in the ref advertisement that are present on the client +can be translated to NewHash and looked up as local objects using the +translation table. + +Negotiation proceeds as today. Any "have"s generated locally are +converted to SHA-1 before being sent to the server, and SHA-1s +mentioned by the server are converted to NewHash when looking them up +locally. + +After negotiation, the server sends a packfile containing the +requested objects. We convert the packfile to NewHash format using +the following steps: + +1. index-pack: inflate each object in the packfile and compute its + SHA-1. Objects can contain deltas in OBJ_REF_DELTA format against + objects the client has locally. These objects can be looked up + using the translation table and their sha1-content read as + described above to resolve the deltas. +2. topological sort: starting at the "want"s from the negotiation + phase, walk through objects in the pack and emit a list of them, + excluding blobs, in reverse topologically sorted order, with each + object coming later in the list than all objects it references. + (This list only contains objects reachable from the "wants". If the + pack from the server contained additional extraneous objects, then + they will be discarded.) +3. convert to newhash: open a new (newhash) packfile. Read the topologically + sorted list just generated. For each object, inflate its + sha1-content, convert to newhash-content, and write it to the newhash + pack. Record the new sha1<->newhash mapping entry for use in the idx. +4. sort: reorder entries in the new pack to match the order of objects + in the pack the server generated and include blobs. Write a newhash idx + file +5. clean up: remove the SHA-1 based pack file, index, and + topologically sorted list obtained from the server in steps 1 + and 2. + +Step 3 requires every object referenced by the new object to be in the +translation table. This is why the topological sort step is necessary. + +As an optimization, step 1 could write a file describing what non-blob +objects each object it has inflated from the packfile references. This +makes the topological sort in step 2 possible without inflating the +objects in the packfile for a second time. The objects need to be +inflated again in step 3, for a total of two inflations. + +Step 4 is probably necessary for good read-time performance. "git +pack-objects" on the server optimizes the pack file for good data +locality (see Documentation/technical/pack-heuristics.txt). + +Details of this process are likely to change. It will take some +experimenting to get this to perform well. + +Push +~~~~ +Push is simpler than fetch because the objects referenced by the +pushed objects are already in the translation table. The sha1-content +of each object being pushed can be read as described in the "Reading +an object's sha1-content" section to generate the pack written by git +send-pack. + +Signed Commits +~~~~~~~~~~~~~~ +We add a new field "gpgsig-newhash" to the commit object format to allow +signing commits without relying on SHA-1. It is similar to the +existing "gpgsig" field. Its signed payload is the newhash-content of the +commit object with any "gpgsig" and "gpgsig-newhash" fields removed. + +This means commits can be signed +1. using SHA-1 only, as in existing signed commit objects +2. using both SHA-1 and NewHash, by using both gpgsig-newhash and gpgsig + fields. +3. using only NewHash, by only using the gpgsig-newhash field. + +Old versions of "git verify-commit" can verify the gpgsig signature in +cases (1) and (2) without modifications and view case (3) as an +ordinary unsigned commit. + +Signed Tags +~~~~~~~~~~~ +We add a new field "gpgsig-newhash" to the tag object format to allow +signing tags without relying on SHA-1. Its signed payload is the +newhash-content of the tag with its gpgsig-newhash field and "-----BEGIN PGP +SIGNATURE-----" delimited in-body signature removed. + +This means tags can be signed +1. using SHA-1 only, as in existing signed tag objects +2. using both SHA-1 and NewHash, by using gpgsig-newhash and an in-body + signature. +3. using only NewHash, by only using the gpgsig-newhash field. + +Mergetag embedding +~~~~~~~~~~~~~~~~~~ +The mergetag field in the sha1-content of a commit contains the +sha1-content of a tag that was merged by that commit. + +The mergetag field in the newhash-content of the same commit contains the +newhash-content of the same tag. + +Submodules +~~~~~~~~~~ +To convert recorded submodule pointers, you need to have the converted +submodule repository in place. The translation table of the submodule +can be used to look up the new hash. + +Loose objects and unreachable objects +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Fast lookups in the loose-object-idx require that the number of loose +objects not grow too high. + +"git gc --auto" currently waits for there to be 6700 loose objects +present before consolidating them into a packfile. We will need to +measure to find a more appropriate threshold for it to use. + +"git gc --auto" currently waits for there to be 50 packs present +before combining packfiles. Packing loose objects more aggressively +may cause the number of pack files to grow too quickly. This can be +mitigated by using a strategy similar to Martin Fick's exponential +rolling garbage collection script: +https://gerrit-review.googlesource.com/c/gerrit/+/35215 + +"git gc" currently expels any unreachable objects it encounters in +pack files to loose objects in an attempt to prevent a race when +pruning them (in case another process is simultaneously writing a new +object that refers to the about-to-be-deleted object). This leads to +an explosion in the number of loose objects present and disk space +usage due to the objects in delta form being replaced with independent +loose objects. Worse, the race is still present for loose objects. + +Instead, "git gc" will need to move unreachable objects to a new +packfile marked as UNREACHABLE_GARBAGE (using the PSRC field; see +below). To avoid the race when writing new objects referring to an +about-to-be-deleted object, code paths that write new objects will +need to copy any objects from UNREACHABLE_GARBAGE packs that they +refer to to new, non-UNREACHABLE_GARBAGE packs (or loose objects). +UNREACHABLE_GARBAGE are then safe to delete if their creation time (as +indicated by the file's mtime) is long enough ago. + +To avoid a proliferation of UNREACHABLE_GARBAGE packs, they can be +combined under certain circumstances. If "gc.garbageTtl" is set to +greater than one day, then packs created within a single calendar day, +UTC, can be coalesced together. The resulting packfile would have an +mtime before midnight on that day, so this makes the effective maximum +ttl the garbageTtl + 1 day. If "gc.garbageTtl" is less than one day, +then we divide the calendar day into intervals one-third of that ttl +in duration. Packs created within the same interval can be coalesced +together. The resulting packfile would have an mtime before the end of +the interval, so this makes the effective maximum ttl equal to the +garbageTtl * 4/3. + +This rule comes from Thirumala Reddy Mutchukota's JGit change +https://git.eclipse.org/r/90465. + +The UNREACHABLE_GARBAGE setting goes in the PSRC field of the pack +index. More generally, that field indicates where a pack came from: + + - 1 (PACK_SOURCE_RECEIVE) for a pack received over the network + - 2 (PACK_SOURCE_AUTO) for a pack created by a lightweight + "gc --auto" operation + - 3 (PACK_SOURCE_GC) for a pack created by a full gc + - 4 (PACK_SOURCE_UNREACHABLE_GARBAGE) for potential garbage + discovered by gc + - 5 (PACK_SOURCE_INSERT) for locally created objects that were + written directly to a pack file, e.g. from "git add ." + +This information can be useful for debugging and for "gc --auto" to +make appropriate choices about which packs to coalesce. + +Caveats +------- +Invalid objects +~~~~~~~~~~~~~~~ +The conversion from sha1-content to newhash-content retains any +brokenness in the original object (e.g., tree entry modes encoded with +leading 0, tree objects whose paths are not sorted correctly, and +commit objects without an author or committer). This is a deliberate +feature of the design to allow the conversion to round-trip. + +More profoundly broken objects (e.g., a commit with a truncated "tree" +header line) cannot be converted but were not usable by current Git +anyway. + +Shallow clone and submodules +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Because it requires all referenced objects to be available in the +locally generated translation table, this design does not support +shallow clone or unfetched submodules. Protocol improvements might +allow lifting this restriction. + +Alternates +~~~~~~~~~~ +For the same reason, a newhash repository cannot borrow objects from a +sha1 repository using objects/info/alternates or +$GIT_ALTERNATE_OBJECT_REPOSITORIES. + +git notes +~~~~~~~~~ +The "git notes" tool annotates objects using their sha1-name as key. +This design does not describe a way to migrate notes trees to use +newhash-names. That migration is expected to happen separately (for +example using a file at the root of the notes tree to describe which +hash it uses). + +Server-side cost +~~~~~~~~~~~~~~~~ +Until Git protocol gains NewHash support, using NewHash based storage +on public-facing Git servers is strongly discouraged. Once Git +protocol gains NewHash support, NewHash based servers are likely not +to support SHA-1 compatibility, to avoid what may be a very expensive +hash reencode during clone and to encourage peers to modernize. + +The design described here allows fetches by SHA-1 clients of a +personal NewHash repository because it's not much more difficult than +allowing pushes from that repository. This support needs to be guarded +by a configuration option --- servers like git.kernel.org that serve a +large number of clients would not be expected to bear that cost. + +Meaning of signatures +~~~~~~~~~~~~~~~~~~~~~ +The signed payload for signed commits and tags does not explicitly +name the hash used to identify objects. If some day Git adopts a new +hash function with the same length as the current SHA-1 (40 +hexadecimal digit) or NewHash (64 hexadecimal digit) objects then the +intent behind the PGP signed payload in an object signature is +unclear: + + object e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7 + type commit + tag v2.12.0 + tagger Junio C Hamano <gitster@pobox.com> 1487962205 -0800 + + Git 2.12 + +Does this mean Git v2.12.0 is the commit with sha1-name +e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7 or the commit with +new-40-digit-hash-name e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7? + +Fortunately NewHash and SHA-1 have different lengths. If Git starts +using another hash with the same length to name objects, then it will +need to change the format of signed payloads using that hash to +address this issue. + +Object names on the command line +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +To support the transition (see Transition plan below), this design +supports four different modes of operation: + + 1. ("dark launch") Treat object names input by the user as SHA-1 and + convert any object names written to output to SHA-1, but store + objects using NewHash. This allows users to test the code with no + visible behavior change except for performance. This allows + allows running even tests that assume the SHA-1 hash function, to + sanity-check the behavior of the new mode. + + 2. ("early transition") Allow both SHA-1 and NewHash object names in + input. Any object names written to output use SHA-1. This allows + users to continue to make use of SHA-1 to communicate with peers + (e.g. by email) that have not migrated yet and prepares for mode 3. + + 3. ("late transition") Allow both SHA-1 and NewHash object names in + input. Any object names written to output use NewHash. In this + mode, users are using a more secure object naming method by + default. The disruption is minimal as long as most of their peers + are in mode 2 or mode 3. + + 4. ("post-transition") Treat object names input by the user as + NewHash and write output using NewHash. This is safer than mode 3 + because there is less risk that input is incorrectly interpreted + using the wrong hash function. + +The mode is specified in configuration. + +The user can also explicitly specify which format to use for a +particular revision specifier and for output, overriding the mode. For +example: + +git --output-format=sha1 log abac87a^{sha1}..f787cac^{newhash} + +Selection of a New Hash +----------------------- +In early 2005, around the time that Git was written, Xiaoyun Wang, +Yiqun Lisa Yin, and Hongbo Yu announced an attack finding SHA-1 +collisions in 2^69 operations. In August they published details. +Luckily, no practical demonstrations of a collision in full SHA-1 were +published until 10 years later, in 2017. + +The hash function NewHash to replace SHA-1 should be stronger than +SHA-1 was: we would like it to be trustworthy and useful in practice +for at least 10 years. + +Some other relevant properties: + +1. A 256-bit hash (long enough to match common security practice; not + excessively long to hurt performance and disk usage). + +2. High quality implementations should be widely available (e.g. in + OpenSSL). + +3. The hash function's properties should match Git's needs (e.g. Git + requires collision and 2nd preimage resistance and does not require + length extension resistance). + +4. As a tiebreaker, the hash should be fast to compute (fortunately + many contenders are faster than SHA-1). + +Some hashes under consideration are SHA-256, SHA-512/256, SHA-256x16, +K12, and BLAKE2bp-256. + +Transition plan +--------------- +Some initial steps can be implemented independently of one another: +- adding a hash function API (vtable) +- teaching fsck to tolerate the gpgsig-newhash field +- excluding gpgsig-* from the fields copied by "git commit --amend" +- annotating tests that depend on SHA-1 values with a SHA1 test + prerequisite +- using "struct object_id", GIT_MAX_RAWSZ, and GIT_MAX_HEXSZ + consistently instead of "unsigned char *" and the hardcoded + constants 20 and 40. +- introducing index v3 +- adding support for the PSRC field and safer object pruning + + +The first user-visible change is the introduction of the objectFormat +extension (without compatObjectFormat). This requires: +- implementing the loose-object-idx +- teaching fsck about this mode of operation +- using the hash function API (vtable) when computing object names +- signing objects and verifying signatures +- rejecting attempts to fetch from or push to an incompatible + repository + +Next comes introduction of compatObjectFormat: +- translating object names between object formats +- translating object content between object formats +- generating and verifying signatures in the compat format +- adding appropriate index entries when adding a new object to the + object store +- --output-format option +- ^{sha1} and ^{newhash} revision notation +- configuration to specify default input and output format (see + "Object names on the command line" above) + +The next step is supporting fetches and pushes to SHA-1 repositories: +- allow pushes to a repository using the compat format +- generate a topologically sorted list of the SHA-1 names of fetched + objects +- convert the fetched packfile to newhash format and generate an idx + file +- re-sort to match the order of objects in the fetched packfile + +The infrastructure supporting fetch also allows converting an existing +repository. In converted repositories and new clones, end users can +gain support for the new hash function without any visible change in +behavior (see "dark launch" in the "Object names on the command line" +section). In particular this allows users to verify NewHash signatures +on objects in the repository, and it should ensure the transition code +is stable in production in preparation for using it more widely. + +Over time projects would encourage their users to adopt the "early +transition" and then "late transition" modes to take advantage of the +new, more futureproof NewHash object names. + +When objectFormat and compatObjectFormat are both set, commands +generating signatures would generate both SHA-1 and NewHash signatures +by default to support both new and old users. + +In projects using NewHash heavily, users could be encouraged to adopt +the "post-transition" mode to avoid accidentally making implicit use +of SHA-1 object names. + +Once a critical mass of users have upgraded to a version of Git that +can verify NewHash signatures and have converted their existing +repositories to support verifying them, we can add support for a +setting to generate only NewHash signatures. This is expected to be at +least a year later. + +That is also a good moment to advertise the ability to convert +repositories to use NewHash only, stripping out all SHA-1 related +metadata. This improves performance by eliminating translation +overhead and security by avoiding the possibility of accidentally +relying on the safety of SHA-1. + +Updating Git's protocols to allow a server to specify which hash +functions it supports is also an important part of this transition. It +is not discussed in detail in this document but this transition plan +assumes it happens. :) + +Alternatives considered +----------------------- +Upgrading everyone working on a particular project on a flag day +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Projects like the Linux kernel are large and complex enough that +flipping the switch for all projects based on the repository at once +is infeasible. + +Not only would all developers and server operators supporting +developers have to switch on the same flag day, but supporting tooling +(continuous integration, code review, bug trackers, etc) would have to +be adapted as well. This also makes it difficult to get early feedback +from some project participants testing before it is time for mass +adoption. + +Using hash functions in parallel +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +(e.g. https://public-inbox.org/git/22708.8913.864049.452252@chiark.greenend.org.uk/ ) +Objects newly created would be addressed by the new hash, but inside +such an object (e.g. commit) it is still possible to address objects +using the old hash function. +* You cannot trust its history (needed for bisectability) in the + future without further work +* Maintenance burden as the number of supported hash functions grows + (they will never go away, so they accumulate). In this proposal, by + comparison, converted objects lose all references to SHA-1. + +Signed objects with multiple hashes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Instead of introducing the gpgsig-newhash field in commit and tag objects +for newhash-content based signatures, an earlier version of this design +added "hash newhash <newhash-name>" fields to strengthen the existing +sha1-content based signatures. + +In other words, a single signature was used to attest to the object +content using both hash functions. This had some advantages: +* Using one signature instead of two speeds up the signing process. +* Having one signed payload with both hashes allows the signer to + attest to the sha1-name and newhash-name referring to the same object. +* All users consume the same signature. Broken signatures are likely + to be detected quickly using current versions of git. + +However, it also came with disadvantages: +* Verifying a signed object requires access to the sha1-names of all + objects it references, even after the transition is complete and + translation table is no longer needed for anything else. To support + this, the design added fields such as "hash sha1 tree <sha1-name>" + and "hash sha1 parent <sha1-name>" to the newhash-content of a signed + commit, complicating the conversion process. +* Allowing signed objects without a sha1 (for after the transition is + complete) complicated the design further, requiring a "nohash sha1" + field to suppress including "hash sha1" fields in the newhash-content + and signed payload. + +Lazily populated translation table +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Some of the work of building the translation table could be deferred to +push time, but that would significantly complicate and slow down pushes. +Calculating the sha1-name at object creation time at the same time it is +being streamed to disk and having its newhash-name calculated should be +an acceptable cost. + +Document History +---------------- + +2017-03-03 +bmwill@google.com, jonathantanmy@google.com, jrnieder@gmail.com, +sbeller@google.com + +Initial version sent to +http://public-inbox.org/git/20170304011251.GA26789@aiede.mtv.corp.google.com + +2017-03-03 jrnieder@gmail.com +Incorporated suggestions from jonathantanmy and sbeller: +* describe purpose of signed objects with each hash type +* redefine signed object verification using object content under the + first hash function + +2017-03-06 jrnieder@gmail.com +* Use SHA3-256 instead of SHA2 (thanks, Linus and brian m. carlson).[1][2] +* Make sha3-based signatures a separate field, avoiding the need for + "hash" and "nohash" fields (thanks to peff[3]). +* Add a sorting phase to fetch (thanks to Junio for noticing the need + for this). +* Omit blobs from the topological sort during fetch (thanks to peff). +* Discuss alternates, git notes, and git servers in the caveats + section (thanks to Junio Hamano, brian m. carlson[4], and Shawn + Pearce). +* Clarify language throughout (thanks to various commenters, + especially Junio). + +2017-09-27 jrnieder@gmail.com, sbeller@google.com +* use placeholder NewHash instead of SHA3-256 +* describe criteria for picking a hash function. +* include a transition plan (thanks especially to Brandon Williams + for fleshing these ideas out) +* define the translation table (thanks, Shawn Pearce[5], Jonathan + Tan, and Masaya Suzuki) +* avoid loose object overhead by packing more aggressively in + "git gc --auto" + +[1] http://public-inbox.org/git/CA+55aFzJtejiCjV0e43+9oR3QuJK2PiFiLQemytoLpyJWe6P9w@mail.gmail.com/ +[2] http://public-inbox.org/git/CA+55aFz+gkAsDZ24zmePQuEs1XPS9BP_s8O7Q4wQ7LV7X5-oDA@mail.gmail.com/ +[3] http://public-inbox.org/git/20170306084353.nrns455dvkdsfgo5@sigill.intra.peff.net/ +[4] http://public-inbox.org/git/20170304224936.rqqtkdvfjgyezsht@genre.crustytoothpaste.net +[5] https://public-inbox.org/git/CAJo=hJtoX9=AyLHHpUJS7fueV9ciZ_MNpnEPHUz8Whui6g9F0A@mail.gmail.com/ diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index ab04c977be..0e88e23653 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v2.15.0-rc0 +DEF_VER=v2.15.0-rc1 LF=' ' @@ -824,6 +824,7 @@ LIB_OBJS += notes-cache.o LIB_OBJS += notes-merge.o LIB_OBJS += notes-utils.o LIB_OBJS += object.o +LIB_OBJS += oidmap.o LIB_OBJS += oidset.o LIB_OBJS += packfile.o LIB_OBJS += pack-bitmap.o diff --git a/builtin/index-pack.c b/builtin/index-pack.c index f2be145e12..8ec459f522 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -633,7 +633,7 @@ static int find_ofs_delta(const off_t offset, enum object_type type) int first = 0, last = nr_ofs_deltas; while (first < last) { - int next = (first + last) / 2; + int next = first + (last - first) / 2; struct ofs_delta_entry *delta = &ofs_deltas[next]; int cmp; @@ -687,7 +687,7 @@ static int find_ref_delta(const unsigned char *sha1, enum object_type type) int first = 0, last = nr_ref_deltas; while (first < last) { - int next = (first + last) / 2; + int next = first + (last - first) / 2; struct ref_delta_entry *delta = &ref_deltas[next]; int cmp; diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 5ee2c48ffb..6e77dfd444 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1277,7 +1277,7 @@ static int done_pbase_path_pos(unsigned hash) int lo = 0; int hi = done_pbase_paths_num; while (lo < hi) { - int mi = (hi + lo) / 2; + int mi = lo + (hi - lo) / 2; if (done_pbase_paths[mi] == hash) return mi; if (done_pbase_paths[mi] < hash) diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 689a29fac1..62ea264c46 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -394,7 +394,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, lo = 0; hi = nr; while (lo < hi) { - mid = (lo + hi)/2; + mid = lo + (hi - lo) / 2; if (base_offset < obj_list[mid].offset) { hi = mid; } else if (base_offset > obj_list[mid].offset) { diff --git a/cache-tree.c b/cache-tree.c index 71d092ed51..d3f7401278 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -49,7 +49,7 @@ static int subtree_pos(struct cache_tree *it, const char *path, int pathlen) lo = 0; hi = it->subtree_nr; while (lo < hi) { - int mi = (lo + hi) / 2; + int mi = lo + (hi - lo) / 2; struct cache_tree_sub *mdl = down[mi]; int cmp = subtree_name_cmp(path, pathlen, mdl->name, mdl->namelen); diff --git a/compat/regex/regex_internal.c b/compat/regex/regex_internal.c index d4121f2f4f..98342b8316 100644 --- a/compat/regex/regex_internal.c +++ b/compat/regex/regex_internal.c @@ -613,7 +613,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags) int low = 0, high = pstr->valid_len, mid; do { - mid = (high + low) / 2; + mid = low + (high - low) / 2; if (pstr->offsets[mid] > offset) high = mid; else if (pstr->offsets[mid] < offset) @@ -1394,7 +1394,7 @@ re_node_set_contains (const re_node_set *set, int elem) right = set->nelem - 1; while (idx < right) { - mid = (idx + right) / 2; + mid = idx + (right - idx) / 2; if (set->elems[mid] < elem) idx = mid + 1; else diff --git a/compat/regex/regexec.c b/compat/regex/regexec.c index 0a745d9c3b..6f2b48a78b 100644 --- a/compat/regex/regexec.c +++ b/compat/regex/regexec.c @@ -4284,7 +4284,7 @@ search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) last = right = mctx->nbkref_ents; for (left = 0; left < right;) { - mid = (left + right) / 2; + mid = left + (right - left) / 2; if (mctx->bkref_ents[mid].str_idx < str_idx) left = mid + 1; else diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index d934417475..0e16f017a4 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -1385,7 +1385,7 @@ _git_describe () __gitcomp " --all --tags --contains --abbrev= --candidates= --exact-match --debug --long --match --always --first-parent - --exclude + --exclude --dirty --broken " return esac @@ -253,6 +253,7 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile) { unsigned int ce_mode_s_ifmt = ce->ce_mode & S_IFMT; + struct delayed_checkout *dco = state->delayed_checkout; int fd, ret, fstat_done = 0; char *new; struct strbuf buf = STRBUF_INIT; @@ -273,55 +274,65 @@ static int write_entry(struct cache_entry *ce, } switch (ce_mode_s_ifmt) { - case S_IFREG: case S_IFLNK: new = read_blob_entry(ce, &size); if (!new) return error("unable to read sha1 file of %s (%s)", - path, oid_to_hex(&ce->oid)); + path, oid_to_hex(&ce->oid)); - if (ce_mode_s_ifmt == S_IFLNK && has_symlinks && !to_tempfile) { - ret = symlink(new, path); - free(new); - if (ret) - return error_errno("unable to create symlink %s", - path); - break; + /* + * We can't make a real symlink; write out a regular file entry + * with the symlink destination as its contents. + */ + if (!has_symlinks || to_tempfile) + goto write_file_entry; + + ret = symlink(new, path); + free(new); + if (ret) + return error_errno("unable to create symlink %s", path); + break; + + case S_IFREG: + /* + * We do not send the blob in case of a retry, so do not + * bother reading it at all. + */ + if (dco && dco->state == CE_RETRY) { + new = NULL; + size = 0; + } else { + new = read_blob_entry(ce, &size); + if (!new) + return error("unable to read sha1 file of %s (%s)", + path, oid_to_hex(&ce->oid)); } /* * Convert from git internal format to working tree format */ - if (ce_mode_s_ifmt == S_IFREG) { - struct delayed_checkout *dco = state->delayed_checkout; - if (dco && dco->state != CE_NO_DELAY) { - /* Do not send the blob in case of a retry. */ - if (dco->state == CE_RETRY) { - new = NULL; - size = 0; - } - ret = async_convert_to_working_tree( - ce->name, new, size, &buf, dco); - if (ret && string_list_has_string(&dco->paths, ce->name)) { - free(new); - goto finish; - } - } else - ret = convert_to_working_tree( - ce->name, new, size, &buf); - - if (ret) { + if (dco && dco->state != CE_NO_DELAY) { + ret = async_convert_to_working_tree(ce->name, new, + size, &buf, dco); + if (ret && string_list_has_string(&dco->paths, ce->name)) { free(new); - new = strbuf_detach(&buf, &newsize); - size = newsize; + goto delayed; } - /* - * No "else" here as errors from convert are OK at this - * point. If the error would have been fatal (e.g. - * filter is required), then we would have died already. - */ + } else + ret = convert_to_working_tree(ce->name, new, size, &buf); + + if (ret) { + free(new); + new = strbuf_detach(&buf, &newsize); + size = newsize; } + /* + * No "else" here as errors from convert are OK at this + * point. If the error would have been fatal (e.g. + * filter is required), then we would have died already. + */ + write_file_entry: fd = open_output_fd(path, ce, to_tempfile); if (fd < 0) { free(new); @@ -336,6 +347,7 @@ static int write_entry(struct cache_entry *ce, if (wrote < 0) return error("unable to write file %s", path); break; + case S_IFGITLINK: if (to_tempfile) return error("cannot create temporary submodule %s", path); @@ -347,6 +359,7 @@ static int write_entry(struct cache_entry *ce, NULL, oid_to_hex(&ce->oid), state->force ? SUBMODULE_MOVE_HEAD_FORCE : 0); break; + default: return error("unknown file mode for %s in index", path); } @@ -355,11 +368,14 @@ finish: if (state->refresh_cache) { assert(state->istate); if (!fstat_done) - lstat(ce->name, &st); + if (lstat(ce->name, &st) < 0) + return error_errno("unable to stat just-written file %s", + ce->name); fill_stat_cache_info(ce, &st); ce->ce_flags |= CE_UPDATE_IN_BASE; state->istate->cache_changed |= CE_ENTRY_CHANGED; } +delayed: return 0; } diff --git a/fetch-pack.c b/fetch-pack.c index 105506e9aa..008b25d3db 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -611,7 +611,7 @@ static int tip_oids_contain(struct oidset *tip_oids, * add to "newlist" between calls, the additions will always be for * oids that are already in the set. */ - if (!tip_oids->map.tablesize) { + if (!tip_oids->map.map.tablesize) { add_refs_to_oidset(tip_oids, unmatched); add_refs_to_oidset(tip_oids, newlist); } @@ -358,15 +358,15 @@ static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *op continue; if (S_ISDIR(entry.mode)) { - obj = &lookup_tree(entry.oid)->object; - if (name) + obj = (struct object *)lookup_tree(entry.oid); + if (name && obj) put_object_name(options, obj, "%s%s/", name, entry.path); result = options->walk(obj, OBJ_TREE, data, options); } else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode)) { - obj = &lookup_blob(entry.oid)->object; - if (name) + obj = (struct object *)lookup_blob(entry.oid); + if (name && obj) put_object_name(options, obj, "%s%s", name, entry.path); result = options->walk(obj, OBJ_BLOB, data, options); diff --git a/oidmap.c b/oidmap.c new file mode 100644 index 0000000000..6db4fffcdb --- /dev/null +++ b/oidmap.c @@ -0,0 +1,51 @@ +#include "cache.h" +#include "oidmap.h" + +static int cmpfn(const void *hashmap_cmp_fn_data, + const void *entry, const void *entry_or_key, + const void *keydata) +{ + const struct oidmap_entry *entry_ = entry; + if (keydata) + return oidcmp(&entry_->oid, (const struct object_id *) keydata); + return oidcmp(&entry_->oid, + &((const struct oidmap_entry *) entry_or_key)->oid); +} + +static int hash(const struct object_id *oid) +{ + int hash; + memcpy(&hash, oid->hash, sizeof(hash)); + return hash; +} + +void oidmap_init(struct oidmap *map, size_t initial_size) +{ + hashmap_init(&map->map, cmpfn, NULL, initial_size); +} + +void oidmap_free(struct oidmap *map, int free_entries) +{ + if (!map) + return; + hashmap_free(&map->map, free_entries); +} + +void *oidmap_get(const struct oidmap *map, const struct object_id *key) +{ + return hashmap_get_from_hash(&map->map, hash(key), key); +} + +void *oidmap_remove(struct oidmap *map, const struct object_id *key) +{ + struct hashmap_entry entry; + hashmap_entry_init(&entry, hash(key)); + return hashmap_remove(&map->map, &entry, key); +} + +void *oidmap_put(struct oidmap *map, void *entry) +{ + struct oidmap_entry *to_put = entry; + hashmap_entry_init(&to_put->internal_entry, hash(&to_put->oid)); + return hashmap_put(&map->map, to_put); +} diff --git a/oidmap.h b/oidmap.h new file mode 100644 index 0000000000..18f54cde14 --- /dev/null +++ b/oidmap.h @@ -0,0 +1,68 @@ +#ifndef OIDMAP_H +#define OIDMAP_H + +#include "hashmap.h" + +/* + * struct oidmap_entry is a structure representing an entry in the hash table, + * which must be used as first member of user data structures. + * + * Users should set the oid field. oidmap_put() will populate the + * internal_entry field. + */ +struct oidmap_entry { + /* For internal use only */ + struct hashmap_entry internal_entry; + + struct object_id oid; +}; + +struct oidmap { + struct hashmap map; +}; + +#define OIDMAP_INIT { { NULL } } + +/* + * Initializes an oidmap structure. + * + * `map` is the oidmap to initialize. + * + * If the total number of entries is known in advance, the `initial_size` + * parameter may be used to preallocate a sufficiently large table and thus + * prevent expensive resizing. If 0, the table is dynamically resized. + */ +extern void oidmap_init(struct oidmap *map, size_t initial_size); + +/* + * Frees an oidmap structure and allocated memory. + * + * If `free_entries` is true, each oidmap_entry in the map is freed as well + * using stdlibs free(). + */ +extern void oidmap_free(struct oidmap *map, int free_entries); + +/* + * Returns the oidmap entry for the specified oid, or NULL if not found. + */ +extern void *oidmap_get(const struct oidmap *map, + const struct object_id *key); + +/* + * Adds or replaces an oidmap entry. + * + * ((struct oidmap_entry *) entry)->internal_entry will be populated by this + * function. + * + * Returns the replaced entry, or NULL if not found (i.e. the entry was added). + */ +extern void *oidmap_put(struct oidmap *map, void *entry); + +/* + * Removes an oidmap entry matching the specified oid. + * + * Returns the removed entry, or NULL if not found. + */ +extern void *oidmap_remove(struct oidmap *map, const struct object_id *key); + +#endif @@ -1,50 +1,30 @@ #include "cache.h" #include "oidset.h" -struct oidset_entry { - struct hashmap_entry hash; - struct object_id oid; -}; - -static int oidset_hashcmp(const void *unused_cmp_data, - const void *va, const void *vb, - const void *vkey) -{ - const struct oidset_entry *a = va, *b = vb; - const struct object_id *key = vkey; - return oidcmp(&a->oid, key ? key : &b->oid); -} - int oidset_contains(const struct oidset *set, const struct object_id *oid) { - struct hashmap_entry key; - - if (!set->map.cmpfn) + if (!set->map.map.tablesize) return 0; - - hashmap_entry_init(&key, sha1hash(oid->hash)); - return !!hashmap_get(&set->map, &key, oid); + return !!oidmap_get(&set->map, oid); } int oidset_insert(struct oidset *set, const struct object_id *oid) { - struct oidset_entry *entry; - - if (!set->map.cmpfn) - hashmap_init(&set->map, oidset_hashcmp, NULL, 0); + struct oidmap_entry *entry; - if (oidset_contains(set, oid)) + if (!set->map.map.tablesize) + oidmap_init(&set->map, 0); + else if (oidset_contains(set, oid)) return 1; entry = xmalloc(sizeof(*entry)); - hashmap_entry_init(&entry->hash, sha1hash(oid->hash)); oidcpy(&entry->oid, oid); - hashmap_add(&set->map, entry); + oidmap_put(&set->map, entry); return 0; } void oidset_clear(struct oidset *set) { - hashmap_free(&set->map, 1); + oidmap_free(&set->map, 1); } @@ -1,6 +1,8 @@ #ifndef OIDSET_H #define OIDSET_H +#include "oidmap.h" + /** * This API is similar to sha1-array, in that it maintains a set of object ids * in a memory-efficient way. The major differences are: @@ -17,10 +19,10 @@ * A single oidset; should be zero-initialized (or use OIDSET_INIT). */ struct oidset { - struct hashmap map; + struct oidmap map; }; -#define OIDSET_INIT { { NULL } } +#define OIDSET_INIT { OIDMAP_INIT } /** * Returns true iff `set` contains `oid`. diff --git a/packfile.c b/packfile.c index eab7542487..4a5fe7ab18 100644 --- a/packfile.c +++ b/packfile.c @@ -1743,7 +1743,7 @@ off_t find_pack_entry_one(const unsigned char *sha1, sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects); while (lo < hi) { - unsigned mi = (lo + hi) / 2; + unsigned mi = lo + (hi - lo) / 2; int cmp = hashcmp(index + mi * stride, sha1); if (debug_lookup) diff --git a/ref-filter.c b/ref-filter.c index 45a3be8340..e728b15b3a 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -82,6 +82,7 @@ static struct used_atom { } remote_ref; struct { enum { C_BARE, C_BODY, C_BODY_DEP, C_LINES, C_SIG, C_SUB, C_TRAILERS } option; + struct process_trailer_options trailer_opts; unsigned int nlines; } contents; struct { @@ -182,9 +183,23 @@ static void subject_atom_parser(const struct ref_format *format, struct used_ato static void trailers_atom_parser(const struct ref_format *format, struct used_atom *atom, const char *arg) { - if (arg) - die(_("%%(trailers) does not take arguments")); + struct string_list params = STRING_LIST_INIT_DUP; + int i; + + if (arg) { + string_list_split(¶ms, arg, ',', -1); + for (i = 0; i < params.nr; i++) { + const char *s = params.items[i].string; + if (!strcmp(s, "unfold")) + atom->u.contents.trailer_opts.unfold = 1; + else if (!strcmp(s, "only")) + atom->u.contents.trailer_opts.only_trailers = 1; + else + die(_("unknown %%(trailers) argument: %s"), s); + } + } atom->u.contents.option = C_TRAILERS; + string_list_clear(¶ms, 0); } static void contents_atom_parser(const struct ref_format *format, struct used_atom *atom, const char *arg) @@ -197,9 +212,10 @@ static void contents_atom_parser(const struct ref_format *format, struct used_at atom->u.contents.option = C_SIG; else if (!strcmp(arg, "subject")) atom->u.contents.option = C_SUB; - else if (!strcmp(arg, "trailers")) - atom->u.contents.option = C_TRAILERS; - else if (skip_prefix(arg, "lines=", &arg)) { + else if (skip_prefix(arg, "trailers", &arg)) { + skip_prefix(arg, ":", &arg); + trailers_atom_parser(format, atom, *arg ? arg : NULL); + } else if (skip_prefix(arg, "lines=", &arg)) { atom->u.contents.option = C_LINES; if (strtoul_ui(arg, 10, &atom->u.contents.nlines)) die(_("positive value expected contents:lines=%s"), arg); @@ -1048,7 +1064,7 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, struct obj name++; if (strcmp(name, "subject") && strcmp(name, "body") && - strcmp(name, "trailers") && + !starts_with(name, "trailers") && !starts_with(name, "contents")) continue; if (!subpos) @@ -1073,13 +1089,12 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, struct obj append_lines(&s, subpos, contents_end - subpos, atom->u.contents.nlines); v->s = strbuf_detach(&s, NULL); } else if (atom->u.contents.option == C_TRAILERS) { - struct trailer_info info; + struct strbuf s = STRBUF_INIT; + + /* Format the trailer info according to the trailer_opts given */ + format_trailers_from_commit(&s, subpos, &atom->u.contents.trailer_opts); - /* Search for trailer info */ - trailer_info_get(&info, subpos); - v->s = xmemdupz(info.trailer_start, - info.trailer_end - info.trailer_start); - trailer_info_release(&info); + v->s = strbuf_detach(&s, NULL); } else if (atom->u.contents.option == C_BARE) v->s = xstrdup(subpos); } @@ -1435,8 +1435,21 @@ const char *refs_resolve_ref_unsafe(struct ref_store *refs, if (refs_read_raw_ref(refs, refname, sha1, &sb_refname, &read_flags)) { *flags |= read_flags; - if (errno != ENOENT || (resolve_flags & RESOLVE_REF_READING)) + + /* In reading mode, refs must eventually resolve */ + if (resolve_flags & RESOLVE_REF_READING) + return NULL; + + /* + * Otherwise a missing ref is OK. But the files backend + * may show errors besides ENOENT if there are + * similarly-named refs. + */ + if (errno != ENOENT && + errno != EISDIR && + errno != ENOTDIR) return NULL; + hashclr(sha1); if (*flags & REF_BAD_NAME) *flags |= REF_ISBROKEN; diff --git a/sequencer.c b/sequencer.c index 7886e2269e..e258bb6469 100644 --- a/sequencer.c +++ b/sequencer.c @@ -2558,7 +2558,7 @@ static enum check_level get_missing_commit_check_level(void) return CHECK_WARN; if (!strcasecmp("error", value)) return CHECK_ERROR; - warning(_("unrecognized setting %s for option" + warning(_("unrecognized setting %s for option " "rebase.missingCommitsCheck. Ignoring."), value); return CHECK_IGNORE; } diff --git a/sha1-lookup.c b/sha1-lookup.c index 2552b7902c..4cf3ebd921 100644 --- a/sha1-lookup.c +++ b/sha1-lookup.c @@ -10,7 +10,7 @@ static uint32_t take2(const unsigned char *sha1) * Conventional binary search loop looks like this: * * do { - * int mi = (lo + hi) / 2; + * int mi = lo + (hi - lo) / 2; * int cmp = "entry pointed at by mi" minus "target"; * if (!cmp) * return (mi is the wanted one) @@ -95,7 +95,7 @@ int sha1_pos(const unsigned char *sha1, void *table, size_t nr, hi = mi; else lo = mi + 1; - mi = (hi + lo) / 2; + mi = lo + (hi - lo) / 2; } while (lo < hi); return -lo-1; } diff --git a/sha1_file.c b/sha1_file.c index 09ad64ce55..10c3a0083d 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1124,10 +1124,14 @@ static int sha1_loose_object_info(const unsigned char *sha1, } else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0) status = error("unable to parse %s header", sha1_to_hex(sha1)); - if (status >= 0 && oi->contentp) + if (status >= 0 && oi->contentp) { *oi->contentp = unpack_sha1_rest(&stream, hdr, *oi->sizep, sha1); - else + if (!*oi->contentp) { + git_inflate_end(&stream); + status = -1; + } + } else git_inflate_end(&stream); munmap(map, mapsize); diff --git a/sha1_name.c b/sha1_name.c index 134ac9742f..c7c5ab376c 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -157,7 +157,7 @@ static void unique_in_pack(struct packed_git *p, num = p->num_objects; last = num; while (first < last) { - uint32_t mid = (first + last) / 2; + uint32_t mid = first + (last - first) / 2; const unsigned char *current; int cmp; diff --git a/string-list.c b/string-list.c index 806b4c8723..a0cf0cfe88 100644 --- a/string-list.c +++ b/string-list.c @@ -16,7 +16,7 @@ static int get_entry_index(const struct string_list *list, const char *string, compare_strings_fn cmp = list->cmp ? list->cmp : strcmp; while (left + 1 < right) { - int middle = (left + right) / 2; + int middle = left + (right - left) / 2; int compare = cmp(string, list->items[middle].string); if (compare < 0) right = middle; diff --git a/t/t1004-read-tree-m-u-wf.sh b/t/t1004-read-tree-m-u-wf.sh index c70cf42300..c7ce5d8bb5 100755 --- a/t/t1004-read-tree-m-u-wf.sh +++ b/t/t1004-read-tree-m-u-wf.sh @@ -218,7 +218,7 @@ test_expect_success 'D/F' ' echo "100644 $a 2 subdir/file2" echo "100644 $b 3 subdir/file2/another" ) >expect && - test_cmp actual expect + test_cmp expect actual ' diff --git a/t/t1401-symbolic-ref.sh b/t/t1401-symbolic-ref.sh index eec3e90f9c..9e782a8122 100755 --- a/t/t1401-symbolic-ref.sh +++ b/t/t1401-symbolic-ref.sh @@ -129,11 +129,35 @@ test_expect_success 'symbolic-ref does not create ref d/f conflicts' ' test_must_fail git symbolic-ref refs/heads/df/conflict refs/heads/df ' -test_expect_success 'symbolic-ref handles existing pointer to invalid name' ' +test_expect_success 'symbolic-ref can overwrite pointer to invalid name' ' + test_when_finished reset_to_sane && head=$(git rev-parse HEAD) && git symbolic-ref HEAD refs/heads/outer && + test_when_finished "git update-ref -d refs/heads/outer/inner" && git update-ref refs/heads/outer/inner $head && git symbolic-ref HEAD refs/heads/unrelated ' +test_expect_success 'symbolic-ref can resolve d/f name (EISDIR)' ' + test_when_finished reset_to_sane && + head=$(git rev-parse HEAD) && + git symbolic-ref HEAD refs/heads/outer/inner && + test_when_finished "git update-ref -d refs/heads/outer" && + git update-ref refs/heads/outer $head && + echo refs/heads/outer/inner >expect && + git symbolic-ref HEAD >actual && + test_cmp expect actual +' + +test_expect_success 'symbolic-ref can resolve d/f name (ENOTDIR)' ' + test_when_finished reset_to_sane && + head=$(git rev-parse HEAD) && + git symbolic-ref HEAD refs/heads/outer && + test_when_finished "git update-ref -d refs/heads/outer/inner" && + git update-ref refs/heads/outer/inner $head && + echo refs/heads/outer >expect && + git symbolic-ref HEAD >actual && + test_cmp expect actual +' + test_done diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index 4087150db1..cb4b66e29d 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -222,6 +222,28 @@ test_expect_success 'unparseable tree object' ' test_i18ngrep ! "fatal: empty filename in tree entry" out ' +hex2oct() { + perl -ne 'printf "\\%03o", hex for /../g' +} + +test_expect_success 'tree entry with type mismatch' ' + test_when_finished "remove_object \$blob" && + test_when_finished "remove_object \$tree" && + test_when_finished "remove_object \$commit" && + test_when_finished "git update-ref -d refs/heads/type_mismatch" && + blob=$(echo blob | git hash-object -w --stdin) && + blob_bin=$(echo $blob | hex2oct) && + tree=$( + printf "40000 dir\0${blob_bin}100644 file\0${blob_bin}" | + git hash-object -t tree --stdin -w --literally + ) && + commit=$(git commit-tree $tree) && + git update-ref refs/heads/type_mismatch $commit && + test_must_fail git fsck >out 2>&1 && + test_i18ngrep "is a blob, not a tree" out && + test_i18ngrep ! "dangling blob" out +' + test_expect_success 'tag pointing to nonexistent' ' cat >invalid-tag <<-\EOF && object ffffffffffffffffffffffffffffffffffffffff diff --git a/t/t3200-branch.sh b/t/t3200-branch.sh index 3ac7ebf85f..503a88d029 100755 --- a/t/t3200-branch.sh +++ b/t/t3200-branch.sh @@ -117,6 +117,16 @@ test_expect_success 'git branch -m bbb should rename checked out branch' ' test_cmp expect actual ' +test_expect_success 'renaming checked out branch works with d/f conflict' ' + test_when_finished "git branch -D foo/bar || git branch -D foo" && + test_when_finished git checkout master && + git checkout -b foo && + git branch -m foo/bar && + git symbolic-ref HEAD >actual && + echo refs/heads/foo/bar >expect && + test_cmp expect actual +' + test_expect_success 'git branch -m o/o o should fail when o/p exists' ' git branch o/o && git branch o/p && diff --git a/t/t3308-notes-merge.sh b/t/t3308-notes-merge.sh index 19aed7ec95..ab946a5153 100755 --- a/t/t3308-notes-merge.sh +++ b/t/t3308-notes-merge.sh @@ -79,7 +79,7 @@ test_expect_success 'fail to merge empty notes ref into empty notes ref (z => y) test_expect_success 'fail to merge into various non-notes refs' ' test_must_fail git -c "core.notesRef=refs/notes" notes merge x && test_must_fail git -c "core.notesRef=refs/notes/" notes merge x && - mkdir -p .git/refs/notes/dir && + git update-ref refs/notes/dir/foo HEAD && test_must_fail git -c "core.notesRef=refs/notes/dir" notes merge x && test_must_fail git -c "core.notesRef=refs/notes/dir/" notes merge x && test_must_fail git -c "core.notesRef=refs/heads/master" notes merge x && diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index 3bca958863..bd0f75d9f7 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -155,7 +155,7 @@ test_expect_success 'ignore-blank-lines: only new lines' ' " >x && git diff --ignore-blank-lines >out && >expect && - test_cmp out expect + test_cmp expect out ' test_expect_success 'ignore-blank-lines: only new lines with space' ' @@ -165,7 +165,7 @@ test_expect_success 'ignore-blank-lines: only new lines with space' ' " >x && git diff -w --ignore-blank-lines >out && >expect && - test_cmp out expect + test_cmp expect out ' test_expect_success 'ignore-blank-lines: after change' ' diff --git a/t/t4205-log-pretty-formats.sh b/t/t4205-log-pretty-formats.sh index 977472f539..591f35daaf 100755 --- a/t/t4205-log-pretty-formats.sh +++ b/t/t4205-log-pretty-formats.sh @@ -544,7 +544,7 @@ Signed-off-by: A U Thor EOF unfold () { - perl -0pe 's/\n\s+/ /' + perl -0pe 's/\n\s+/ /g' } test_expect_success 'set up trailer tests' ' diff --git a/t/t6007-rev-list-cherry-pick-file.sh b/t/t6007-rev-list-cherry-pick-file.sh index 2959745196..f0268372d2 100755 --- a/t/t6007-rev-list-cherry-pick-file.sh +++ b/t/t6007-rev-list-cherry-pick-file.sh @@ -57,7 +57,7 @@ test_expect_success '--left-right' ' git rev-list --left-right B...C > actual && git name-rev --stdin --name-only --refs="*tags/*" \ < actual > actual.named && - test_cmp actual.named expect + test_cmp expect actual.named ' test_expect_success '--count' ' @@ -77,14 +77,14 @@ test_expect_success '--cherry-pick bar does not come up empty' ' git rev-list --left-right --cherry-pick B...C -- bar > actual && git name-rev --stdin --name-only --refs="*tags/*" \ < actual > actual.named && - test_cmp actual.named expect + test_cmp expect actual.named ' test_expect_success 'bar does not come up empty' ' git rev-list --left-right B...C -- bar > actual && git name-rev --stdin --name-only --refs="*tags/*" \ < actual > actual.named && - test_cmp actual.named expect + test_cmp expect actual.named ' cat >expect <<EOF @@ -96,14 +96,14 @@ test_expect_success '--cherry-pick bar does not come up empty (II)' ' git rev-list --left-right --cherry-pick F...E -- bar > actual && git name-rev --stdin --name-only --refs="*tags/*" \ < actual > actual.named && - test_cmp actual.named expect + test_cmp expect actual.named ' test_expect_success 'name-rev multiple --refs combine inclusive' ' git rev-list --left-right --cherry-pick F...E -- bar >actual && git name-rev --stdin --name-only --refs="*tags/F" --refs="*tags/E" \ <actual >actual.named && - test_cmp actual.named expect + test_cmp expect actual.named ' cat >expect <<EOF @@ -115,7 +115,7 @@ test_expect_success 'name-rev --refs excludes non-matched patterns' ' git rev-list --left-right --cherry-pick F...E -- bar >actual && git name-rev --stdin --name-only --refs="*tags/F" \ <actual >actual.named && - test_cmp actual.named expect + test_cmp expect actual.named ' cat >expect <<EOF @@ -127,14 +127,14 @@ test_expect_success 'name-rev --exclude excludes matched patterns' ' git rev-list --left-right --cherry-pick F...E -- bar >actual && git name-rev --stdin --name-only --refs="*tags/*" --exclude="*E" \ <actual >actual.named && - test_cmp actual.named expect + test_cmp expect actual.named ' test_expect_success 'name-rev --no-refs clears the refs list' ' git rev-list --left-right --cherry-pick F...E -- bar >expect && git name-rev --stdin --name-only --refs="*tags/F" --refs="*tags/E" --no-refs --refs="*tags/G" \ <expect >actual && - test_cmp actual expect + test_cmp expect actual ' cat >expect <<EOF @@ -148,7 +148,7 @@ test_expect_success '--cherry-mark' ' git rev-list --cherry-mark F...E -- bar > actual && git name-rev --stdin --name-only --refs="*tags/*" \ < actual > actual.named && - test_cmp actual.named expect + test_cmp expect actual.named ' cat >expect <<EOF @@ -162,7 +162,7 @@ test_expect_success '--cherry-mark --left-right' ' git rev-list --cherry-mark --left-right F...E -- bar > actual && git name-rev --stdin --name-only --refs="*tags/*" \ < actual > actual.named && - test_cmp actual.named expect + test_cmp expect actual.named ' cat >expect <<EOF @@ -173,14 +173,14 @@ test_expect_success '--cherry-pick --right-only' ' git rev-list --cherry-pick --right-only F...E -- bar > actual && git name-rev --stdin --name-only --refs="*tags/*" \ < actual > actual.named && - test_cmp actual.named expect + test_cmp expect actual.named ' test_expect_success '--cherry-pick --left-only' ' git rev-list --cherry-pick --left-only E...F -- bar > actual && git name-rev --stdin --name-only --refs="*tags/*" \ < actual > actual.named && - test_cmp actual.named expect + test_cmp expect actual.named ' cat >expect <<EOF @@ -192,7 +192,7 @@ test_expect_success '--cherry' ' git rev-list --cherry F...E -- bar > actual && git name-rev --stdin --name-only --refs="*tags/*" \ < actual > actual.named && - test_cmp actual.named expect + test_cmp expect actual.named ' cat >expect <<EOF @@ -201,7 +201,7 @@ EOF test_expect_success '--cherry --count' ' git rev-list --cherry --count F...E -- bar > actual && - test_cmp actual expect + test_cmp expect actual ' cat >expect <<EOF @@ -210,7 +210,7 @@ EOF test_expect_success '--cherry-mark --count' ' git rev-list --cherry-mark --count F...E -- bar > actual && - test_cmp actual expect + test_cmp expect actual ' cat >expect <<EOF @@ -219,7 +219,7 @@ EOF test_expect_success '--cherry-mark --left-right --count' ' git rev-list --cherry-mark --left-right --count F...E -- bar > actual && - test_cmp actual expect + test_cmp expect actual ' test_expect_success '--cherry-pick with independent, but identical branches' ' diff --git a/t/t6013-rev-list-reverse-parents.sh b/t/t6013-rev-list-reverse-parents.sh index 59fc2f06e0..89458d370f 100755 --- a/t/t6013-rev-list-reverse-parents.sh +++ b/t/t6013-rev-list-reverse-parents.sh @@ -28,7 +28,7 @@ test_expect_success '--reverse --parents --full-history combines correctly' ' perl -e "print reverse <>" > expected && git rev-list --reverse --parents --full-history master -- foo \ > actual && - test_cmp actual expected + test_cmp expected actual ' test_expect_success '--boundary does too' ' @@ -36,7 +36,7 @@ test_expect_success '--boundary does too' ' perl -e "print reverse <>" > expected && git rev-list --boundary --reverse --parents --full-history \ master ^root -- foo > actual && - test_cmp actual expected + test_cmp expected actual ' test_done diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh index a6f51a5003..416ff7d0b8 100755 --- a/t/t6300-for-each-ref.sh +++ b/t/t6300-for-each-ref.sh @@ -605,18 +605,104 @@ test_expect_success 'do not dereference NULL upon %(HEAD) on unborn branch' ' cat >trailers <<EOF Reviewed-by: A U Thor <author@example.com> Signed-off-by: A U Thor <author@example.com> +[ v2 updated patch description ] +Acked-by: A U Thor + <author@example.com> EOF -test_expect_success 'basic atom: head contents:trailers' ' +unfold () { + perl -0pe 's/\n\s+/ /g' +} + +test_expect_success 'set up trailers for next test' ' echo "Some contents" > two && git add two && - git commit -F - <<-EOF && + git commit -F - <<-EOF trailers: this commit message has trailers Some message contents $(cat trailers) EOF +' + +test_expect_success '%(trailers:unfold) unfolds trailers' ' + git for-each-ref --format="%(trailers:unfold)" refs/heads/master >actual && + { + unfold <trailers + echo + } >expect && + test_cmp expect actual +' + +test_expect_success '%(trailers:only) shows only "key: value" trailers' ' + git for-each-ref --format="%(trailers:only)" refs/heads/master >actual && + { + grep -v patch.description <trailers && + echo + } >expect && + test_cmp expect actual +' + +test_expect_success '%(trailers:only) and %(trailers:unfold) work together' ' + git for-each-ref --format="%(trailers:only,unfold)" refs/heads/master >actual && + git for-each-ref --format="%(trailers:unfold,only)" refs/heads/master >reverse && + test_cmp actual reverse && + { + grep -v patch.description <trailers | unfold && + echo + } >expect && + test_cmp expect actual +' + +test_expect_success '%(contents:trailers:unfold) unfolds trailers' ' + git for-each-ref --format="%(contents:trailers:unfold)" refs/heads/master >actual && + { + unfold <trailers + echo + } >expect && + test_cmp expect actual +' + +test_expect_success '%(contents:trailers:only) shows only "key: value" trailers' ' + git for-each-ref --format="%(contents:trailers:only)" refs/heads/master >actual && + { + grep -v patch.description <trailers && + echo + } >expect && + test_cmp expect actual +' + +test_expect_success '%(contents:trailers:only) and %(contents:trailers:unfold) work together' ' + git for-each-ref --format="%(contents:trailers:only,unfold)" refs/heads/master >actual && + git for-each-ref --format="%(contents:trailers:unfold,only)" refs/heads/master >reverse && + test_cmp actual reverse && + { + grep -v patch.description <trailers | unfold && + echo + } >expect && + test_cmp expect actual +' + +test_expect_success '%(trailers) rejects unknown trailers arguments' ' + # error message cannot be checked under i18n + cat >expect <<-EOF && + fatal: unknown %(trailers) argument: unsupported + EOF + test_must_fail git for-each-ref --format="%(trailers:unsupported)" 2>actual && + test_i18ncmp expect actual +' + +test_expect_success '%(contents:trailers) rejects unknown trailers arguments' ' + # error message cannot be checked under i18n + cat >expect <<-EOF && + fatal: unknown %(trailers) argument: unsupported + EOF + test_must_fail git for-each-ref --format="%(contents:trailers:unsupported)" 2>actual && + test_i18ncmp expect actual +' + +test_expect_success 'basic atom: head contents:trailers' ' git for-each-ref --format="%(contents:trailers)" refs/heads/master >actual && sanitize_pgp <actual >actual.clean && # git for-each-ref ends with a blank line diff --git a/t/t7001-mv.sh b/t/t7001-mv.sh index cbc5fb37fe..f5929c46f3 100755 --- a/t/t7001-mv.sh +++ b/t/t7001-mv.sh @@ -488,7 +488,7 @@ test_expect_success 'moving a submodule in nested directories' ' git config -f ../.gitmodules submodule.deep/directory/hierarchy/sub.path >../actual && echo "directory/hierarchy/sub" >../expect ) && - test_cmp actual expect + test_cmp expect actual ' test_expect_failure 'moving nested submodules' ' diff --git a/t/t7005-editor.sh b/t/t7005-editor.sh index 1b530b5022..29e5043b94 100755 --- a/t/t7005-editor.sh +++ b/t/t7005-editor.sh @@ -38,7 +38,7 @@ test_expect_success setup ' test_commit "$msg" && echo "$msg" >expect && git show -s --format=%s > actual && - test_cmp actual expect + test_cmp expect actual ' @@ -85,7 +85,7 @@ do git --exec-path=. commit --amend && git show -s --pretty=oneline | sed -e "s/^[0-9a-f]* //" >actual && - test_cmp actual expect + test_cmp expect actual ' done @@ -107,7 +107,7 @@ do git --exec-path=. commit --amend && git show -s --pretty=oneline | sed -e "s/^[0-9a-f]* //" >actual && - test_cmp actual expect + test_cmp expect actual ' done diff --git a/t/t7102-reset.sh b/t/t7102-reset.sh index 86f23be34a..95653a08ca 100755 --- a/t/t7102-reset.sh +++ b/t/t7102-reset.sh @@ -428,9 +428,9 @@ test_expect_success 'test --mixed <paths>' ' git reset HEAD -- file1 file2 file3 && test_must_fail git diff --quiet && git diff > output && - test_cmp output expect && + test_cmp expect output && git diff --cached > output && - test_cmp output cached_expect + test_cmp cached_expect output ' test_expect_success 'test resetting the index at give paths' ' diff --git a/t/t7201-co.sh b/t/t7201-co.sh index d4b217b0ee..76c223c967 100755 --- a/t/t7201-co.sh +++ b/t/t7201-co.sh @@ -187,7 +187,7 @@ test_expect_success 'format of merge conflict from checkout -m' ' d >>>>>>> local EOF - test_cmp two expect + test_cmp expect two ' test_expect_success 'checkout --merge --conflict=diff3 <branch>' ' @@ -213,7 +213,7 @@ test_expect_success 'checkout --merge --conflict=diff3 <branch>' ' d >>>>>>> local EOF - test_cmp two expect + test_cmp expect two ' test_expect_success 'switch to another branch while carrying a deletion' ' diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh index 6f8337ffb5..a39e69a3eb 100755 --- a/t/t7400-submodule-basic.sh +++ b/t/t7400-submodule-basic.sh @@ -1211,7 +1211,7 @@ test_expect_success 'clone --recurse-submodules with a pathspec works' ' git clone --recurse-submodules="sub0" multisuper multisuper_clone && git -C multisuper_clone submodule status |cut -c1,43- >actual && - test_cmp actual expected + test_cmp expected actual ' test_expect_success 'clone with multiple --recurse-submodules options' ' diff --git a/t/t7405-submodule-merge.sh b/t/t7405-submodule-merge.sh index 0d5b42a25b..7bfb2f498d 100755 --- a/t/t7405-submodule-merge.sh +++ b/t/t7405-submodule-merge.sh @@ -119,7 +119,7 @@ test_expect_success 'merge with one side as a fast-forward of the other' ' git ls-tree test-forward sub | cut -f1 | cut -f3 -d" " > actual && (cd sub && git rev-parse sub-d > ../expect) && - test_cmp actual expect) + test_cmp expect actual) ' test_expect_success 'merging should conflict for non fast-forward' ' diff --git a/t/t7506-status-submodule.sh b/t/t7506-status-submodule.sh index 055c90736e..9edf6572ed 100755 --- a/t/t7506-status-submodule.sh +++ b/t/t7506-status-submodule.sh @@ -306,7 +306,7 @@ test_expect_success 'diff with merge conflict in .gitmodules' ' cd super && git diff >../diff_actual 2>&1 ) && - test_cmp diff_actual diff_expect + test_cmp diff_expect diff_actual ' test_expect_success 'diff --submodule with merge conflict in .gitmodules' ' @@ -314,7 +314,7 @@ test_expect_success 'diff --submodule with merge conflict in .gitmodules' ' cd super && git diff --submodule >../diff_submodule_actual 2>&1 ) && - test_cmp diff_submodule_actual diff_submodule_expect + test_cmp diff_submodule_expect diff_submodule_actual ' # We'll setup different cases for further testing: diff --git a/t/t7600-merge.sh b/t/t7600-merge.sh index 80194b79f9..dfde6a675a 100755 --- a/t/t7600-merge.sh +++ b/t/t7600-merge.sh @@ -697,7 +697,7 @@ test_expect_success 'merge --no-ff --edit' ' git cat-file commit HEAD >raw && grep "work done on the side branch" raw && sed "1,/^$/d" >actual raw && - test_cmp actual expected + test_cmp expected actual ' test_expect_success GPG 'merge --ff-only tag' ' @@ -709,7 +709,7 @@ test_expect_success GPG 'merge --ff-only tag' ' git merge --ff-only signed && git rev-parse signed^0 >expect && git rev-parse HEAD >actual && - test_cmp actual expect + test_cmp expect actual ' test_expect_success GPG 'merge --no-edit tag should skip editor' ' @@ -721,7 +721,7 @@ test_expect_success GPG 'merge --no-edit tag should skip editor' ' EDITOR=false git merge --no-edit signed && git rev-parse signed^0 >expect && git rev-parse HEAD^2 >actual && - test_cmp actual expect + test_cmp expect actual ' test_expect_success 'set up mod-256 conflict scenario' ' diff --git a/t/t7610-mergetool.sh b/t/t7610-mergetool.sh index 381b7df452..1a430b9c40 100755 --- a/t/t7610-mergetool.sh +++ b/t/t7610-mergetool.sh @@ -621,7 +621,7 @@ test_expect_success 'file with no base' ' test_must_fail git merge master && git mergetool --no-prompt --tool mybase -- both && >expected && - test_cmp both expected + test_cmp expected both ' test_expect_success 'custom commands override built-ins' ' @@ -632,7 +632,7 @@ test_expect_success 'custom commands override built-ins' ' test_must_fail git merge master && git mergetool --no-prompt --tool defaults -- both && echo master both added >expected && - test_cmp both expected + test_cmp expected both ' test_expect_success 'filenames seen by tools start with ./' ' diff --git a/t/t9001-send-email.sh b/t/t9001-send-email.sh index f30980895c..4d261c2a9c 100755 --- a/t/t9001-send-email.sh +++ b/t/t9001-send-email.sh @@ -1266,7 +1266,7 @@ test_expect_success $PREREQ 'asks about and fixes 8bit encodings' ' grep email-using-8bit stdout && grep "Which 8bit encoding" stdout && egrep "Content|MIME" msgtxt1 >actual && - test_cmp actual content-type-decl + test_cmp content-type-decl actual ' test_expect_success $PREREQ 'sendemail.8bitEncoding works' ' @@ -1277,7 +1277,7 @@ test_expect_success $PREREQ 'sendemail.8bitEncoding works' ' --smtp-server="$(pwd)/fake.sendmail" \ email-using-8bit >stdout && egrep "Content|MIME" msgtxt1 >actual && - test_cmp actual content-type-decl + test_cmp content-type-decl actual ' test_expect_success $PREREQ '--8bit-encoding overrides sendemail.8bitEncoding' ' @@ -1289,7 +1289,7 @@ test_expect_success $PREREQ '--8bit-encoding overrides sendemail.8bitEncoding' ' --8bit-encoding=UTF-8 \ email-using-8bit >stdout && egrep "Content|MIME" msgtxt1 >actual && - test_cmp actual content-type-decl + test_cmp content-type-decl actual ' test_expect_success $PREREQ 'setup expect' ' @@ -32,7 +32,7 @@ static int bisearch(ucs_char_t ucs, const struct interval *table, int max) if (ucs < table[0].first || ucs > table[max].last) return 0; while (max >= min) { - mid = (min + max) / 2; + mid = min + (max - min) / 2; if (ucs > table[mid].last) min = mid + 1; else if (ucs < table[mid].first) diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c index a613efc703..9f91702de7 100644 --- a/xdiff/xpatience.c +++ b/xdiff/xpatience.c @@ -166,7 +166,7 @@ static int binary_search(struct entry **sequence, int longest, int left = -1, right = longest; while (left + 1 < right) { - int middle = (left + right) / 2; + int middle = left + (right - left) / 2; /* by construction, no two entries can be equal */ if (sequence[middle]->line2 > entry->line2) right = middle; |
