packfile.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. package packfile
  2. import (
  3. "bytes"
  4. "io"
  5. "os"
  6. billy "gopkg.in/src-d/go-billy.v4"
  7. "gopkg.in/src-d/go-git.v4/plumbing"
  8. "gopkg.in/src-d/go-git.v4/plumbing/cache"
  9. "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
  10. "gopkg.in/src-d/go-git.v4/plumbing/storer"
  11. )
  12. var (
  13. // ErrInvalidObject is returned by Decode when an invalid object is
  14. // found in the packfile.
  15. ErrInvalidObject = NewError("invalid git object")
  16. // ErrZLib is returned by Decode when there was an error unzipping
  17. // the packfile contents.
  18. ErrZLib = NewError("zlib reading error")
  19. )
  20. // When reading small objects from packfile it is beneficial to do so at
  21. // once to exploit the buffered I/O. In many cases the objects are so small
  22. // that they were already loaded to memory when the object header was
  23. // loaded from the packfile. Wrapping in FSObject would cause this buffered
  24. // data to be thrown away and then re-read later, with the additional
  25. // seeking causing reloads from disk. Objects smaller than this threshold
  26. // are now always read into memory and stored in cache instead of being
  27. // wrapped in FSObject.
  28. const smallObjectThreshold = 16 * 1024
  29. // Packfile allows retrieving information from inside a packfile.
  30. type Packfile struct {
  31. idxfile.Index
  32. fs billy.Filesystem
  33. file billy.File
  34. s *Scanner
  35. deltaBaseCache cache.Object
  36. offsetToType map[int64]plumbing.ObjectType
  37. }
  38. // NewPackfileWithCache creates a new Packfile with the given object cache.
  39. // If the filesystem is provided, the packfile will return FSObjects, otherwise
  40. // it will return MemoryObjects.
  41. func NewPackfileWithCache(
  42. index idxfile.Index,
  43. fs billy.Filesystem,
  44. file billy.File,
  45. cache cache.Object,
  46. ) *Packfile {
  47. s := NewScanner(file)
  48. return &Packfile{
  49. index,
  50. fs,
  51. file,
  52. s,
  53. cache,
  54. make(map[int64]plumbing.ObjectType),
  55. }
  56. }
  57. // NewPackfile returns a packfile representation for the given packfile file
  58. // and packfile idx.
  59. // If the filesystem is provided, the packfile will return FSObjects, otherwise
  60. // it will return MemoryObjects.
  61. func NewPackfile(index idxfile.Index, fs billy.Filesystem, file billy.File) *Packfile {
  62. return NewPackfileWithCache(index, fs, file, cache.NewObjectLRUDefault())
  63. }
  64. // Get retrieves the encoded object in the packfile with the given hash.
  65. func (p *Packfile) Get(h plumbing.Hash) (plumbing.EncodedObject, error) {
  66. offset, err := p.FindOffset(h)
  67. if err != nil {
  68. return nil, err
  69. }
  70. return p.GetByOffset(offset)
  71. }
  72. // GetByOffset retrieves the encoded object from the packfile with the given
  73. // offset.
  74. func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) {
  75. hash, err := p.FindHash(o)
  76. if err == nil {
  77. if obj, ok := p.deltaBaseCache.Get(hash); ok {
  78. return obj, nil
  79. }
  80. }
  81. return p.objectAtOffset(o)
  82. }
  83. // GetSizeByOffset retrieves the size of the encoded object from the
  84. // packfile with the given offset.
  85. func (p *Packfile) GetSizeByOffset(o int64) (size int64, err error) {
  86. if _, err := p.s.SeekFromStart(o); err != nil {
  87. if err == io.EOF || isInvalid(err) {
  88. return 0, plumbing.ErrObjectNotFound
  89. }
  90. return 0, err
  91. }
  92. h, err := p.nextObjectHeader()
  93. if err != nil {
  94. return 0, err
  95. }
  96. return h.Length, nil
  97. }
  98. func (p *Packfile) objectHeaderAtOffset(offset int64) (*ObjectHeader, error) {
  99. h, err := p.s.SeekObjectHeader(offset)
  100. p.s.pendingObject = nil
  101. return h, err
  102. }
  103. func (p *Packfile) nextObjectHeader() (*ObjectHeader, error) {
  104. h, err := p.s.NextObjectHeader()
  105. p.s.pendingObject = nil
  106. return h, err
  107. }
  108. func (p *Packfile) getObjectSize(h *ObjectHeader) (int64, error) {
  109. switch h.Type {
  110. case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
  111. return h.Length, nil
  112. case plumbing.REFDeltaObject, plumbing.OFSDeltaObject:
  113. buf := bufPool.Get().(*bytes.Buffer)
  114. buf.Reset()
  115. defer bufPool.Put(buf)
  116. if _, _, err := p.s.NextObject(buf); err != nil {
  117. return 0, err
  118. }
  119. delta := buf.Bytes()
  120. _, delta = decodeLEB128(delta) // skip src size
  121. sz, _ := decodeLEB128(delta)
  122. return int64(sz), nil
  123. default:
  124. return 0, ErrInvalidObject.AddDetails("type %q", h.Type)
  125. }
  126. }
  127. func (p *Packfile) getObjectType(h *ObjectHeader) (typ plumbing.ObjectType, err error) {
  128. switch h.Type {
  129. case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
  130. return h.Type, nil
  131. case plumbing.REFDeltaObject, plumbing.OFSDeltaObject:
  132. var offset int64
  133. if h.Type == plumbing.REFDeltaObject {
  134. offset, err = p.FindOffset(h.Reference)
  135. if err != nil {
  136. return
  137. }
  138. } else {
  139. offset = h.OffsetReference
  140. }
  141. if baseType, ok := p.offsetToType[offset]; ok {
  142. typ = baseType
  143. } else {
  144. h, err = p.objectHeaderAtOffset(offset)
  145. if err != nil {
  146. return
  147. }
  148. typ, err = p.getObjectType(h)
  149. if err != nil {
  150. return
  151. }
  152. }
  153. default:
  154. err = ErrInvalidObject.AddDetails("type %q", h.Type)
  155. }
  156. return
  157. }
  158. func (p *Packfile) objectAtOffset(offset int64) (plumbing.EncodedObject, error) {
  159. h, err := p.objectHeaderAtOffset(offset)
  160. if err != nil {
  161. if err == io.EOF || isInvalid(err) {
  162. return nil, plumbing.ErrObjectNotFound
  163. }
  164. return nil, err
  165. }
  166. // If we have no filesystem, we will return a MemoryObject instead
  167. // of an FSObject.
  168. if p.fs == nil {
  169. return p.getNextObject(h)
  170. }
  171. // If the object is not a delta and it's small enough then read it
  172. // completely into memory now since it is already read from disk
  173. // into buffer anyway.
  174. if h.Length <= smallObjectThreshold && h.Type != plumbing.OFSDeltaObject && h.Type != plumbing.REFDeltaObject {
  175. return p.getNextObject(h)
  176. }
  177. hash, err := p.FindHash(h.Offset)
  178. if err != nil {
  179. return nil, err
  180. }
  181. size, err := p.getObjectSize(h)
  182. if err != nil {
  183. return nil, err
  184. }
  185. typ, err := p.getObjectType(h)
  186. if err != nil {
  187. return nil, err
  188. }
  189. p.offsetToType[h.Offset] = typ
  190. return NewFSObject(
  191. hash,
  192. typ,
  193. h.Offset,
  194. size,
  195. p.Index,
  196. p.fs,
  197. p.file.Name(),
  198. p.deltaBaseCache,
  199. ), nil
  200. }
  201. func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) {
  202. ref, err := p.FindHash(offset)
  203. if err == nil {
  204. obj, ok := p.cacheGet(ref)
  205. if ok {
  206. reader, err := obj.Reader()
  207. if err != nil {
  208. return nil, err
  209. }
  210. return reader, nil
  211. }
  212. }
  213. h, err := p.objectHeaderAtOffset(offset)
  214. if err != nil {
  215. return nil, err
  216. }
  217. obj, err := p.getNextObject(h)
  218. if err != nil {
  219. return nil, err
  220. }
  221. return obj.Reader()
  222. }
  223. func (p *Packfile) getNextObject(h *ObjectHeader) (plumbing.EncodedObject, error) {
  224. var obj = new(plumbing.MemoryObject)
  225. obj.SetSize(h.Length)
  226. obj.SetType(h.Type)
  227. var err error
  228. switch h.Type {
  229. case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
  230. err = p.fillRegularObjectContent(obj)
  231. case plumbing.REFDeltaObject:
  232. err = p.fillREFDeltaObjectContent(obj, h.Reference)
  233. case plumbing.OFSDeltaObject:
  234. err = p.fillOFSDeltaObjectContent(obj, h.OffsetReference)
  235. default:
  236. err = ErrInvalidObject.AddDetails("type %q", h.Type)
  237. }
  238. if err != nil {
  239. return nil, err
  240. }
  241. return obj, nil
  242. }
  243. func (p *Packfile) fillRegularObjectContent(obj plumbing.EncodedObject) error {
  244. w, err := obj.Writer()
  245. if err != nil {
  246. return err
  247. }
  248. _, _, err = p.s.NextObject(w)
  249. p.cachePut(obj)
  250. return err
  251. }
  252. func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plumbing.Hash) error {
  253. buf := bufPool.Get().(*bytes.Buffer)
  254. buf.Reset()
  255. _, _, err := p.s.NextObject(buf)
  256. if err != nil {
  257. return err
  258. }
  259. base, ok := p.cacheGet(ref)
  260. if !ok {
  261. base, err = p.Get(ref)
  262. if err != nil {
  263. return err
  264. }
  265. }
  266. obj.SetType(base.Type())
  267. err = ApplyDelta(obj, base, buf.Bytes())
  268. p.cachePut(obj)
  269. bufPool.Put(buf)
  270. return err
  271. }
  272. func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) error {
  273. buf := bytes.NewBuffer(nil)
  274. _, _, err := p.s.NextObject(buf)
  275. if err != nil {
  276. return err
  277. }
  278. var base plumbing.EncodedObject
  279. var ok bool
  280. hash, err := p.FindHash(offset)
  281. if err == nil {
  282. base, ok = p.cacheGet(hash)
  283. }
  284. if !ok {
  285. base, err = p.GetByOffset(offset)
  286. if err != nil {
  287. return err
  288. }
  289. }
  290. obj.SetType(base.Type())
  291. err = ApplyDelta(obj, base, buf.Bytes())
  292. p.cachePut(obj)
  293. return err
  294. }
  295. func (p *Packfile) cacheGet(h plumbing.Hash) (plumbing.EncodedObject, bool) {
  296. if p.deltaBaseCache == nil {
  297. return nil, false
  298. }
  299. return p.deltaBaseCache.Get(h)
  300. }
  301. func (p *Packfile) cachePut(obj plumbing.EncodedObject) {
  302. if p.deltaBaseCache == nil {
  303. return
  304. }
  305. p.deltaBaseCache.Put(obj)
  306. }
  307. // GetAll returns an iterator with all encoded objects in the packfile.
  308. // The iterator returned is not thread-safe, it should be used in the same
  309. // thread as the Packfile instance.
  310. func (p *Packfile) GetAll() (storer.EncodedObjectIter, error) {
  311. return p.GetByType(plumbing.AnyObject)
  312. }
  313. // GetByType returns all the objects of the given type.
  314. func (p *Packfile) GetByType(typ plumbing.ObjectType) (storer.EncodedObjectIter, error) {
  315. switch typ {
  316. case plumbing.AnyObject,
  317. plumbing.BlobObject,
  318. plumbing.TreeObject,
  319. plumbing.CommitObject,
  320. plumbing.TagObject:
  321. entries, err := p.EntriesByOffset()
  322. if err != nil {
  323. return nil, err
  324. }
  325. return &objectIter{
  326. // Easiest way to provide an object decoder is just to pass a Packfile
  327. // instance. To not mess with the seeks, it's a new instance with a
  328. // different scanner but the same cache and offset to hash map for
  329. // reusing as much cache as possible.
  330. p: p,
  331. iter: entries,
  332. typ: typ,
  333. }, nil
  334. default:
  335. return nil, plumbing.ErrInvalidType
  336. }
  337. }
  338. // ID returns the ID of the packfile, which is the checksum at the end of it.
  339. func (p *Packfile) ID() (plumbing.Hash, error) {
  340. prev, err := p.file.Seek(-20, io.SeekEnd)
  341. if err != nil {
  342. return plumbing.ZeroHash, err
  343. }
  344. var hash plumbing.Hash
  345. if _, err := io.ReadFull(p.file, hash[:]); err != nil {
  346. return plumbing.ZeroHash, err
  347. }
  348. if _, err := p.file.Seek(prev, io.SeekStart); err != nil {
  349. return plumbing.ZeroHash, err
  350. }
  351. return hash, nil
  352. }
  353. // Close the packfile and its resources.
  354. func (p *Packfile) Close() error {
  355. closer, ok := p.file.(io.Closer)
  356. if !ok {
  357. return nil
  358. }
  359. return closer.Close()
  360. }
  361. type objectIter struct {
  362. p *Packfile
  363. typ plumbing.ObjectType
  364. iter idxfile.EntryIter
  365. }
  366. func (i *objectIter) Next() (plumbing.EncodedObject, error) {
  367. for {
  368. e, err := i.iter.Next()
  369. if err != nil {
  370. return nil, err
  371. }
  372. obj, err := i.p.GetByOffset(int64(e.Offset))
  373. if err != nil {
  374. return nil, err
  375. }
  376. if i.typ == plumbing.AnyObject || obj.Type() == i.typ {
  377. return obj, nil
  378. }
  379. }
  380. }
  381. func (i *objectIter) ForEach(f func(plumbing.EncodedObject) error) error {
  382. for {
  383. o, err := i.Next()
  384. if err != nil {
  385. if err == io.EOF {
  386. return nil
  387. }
  388. return err
  389. }
  390. if err := f(o); err != nil {
  391. return err
  392. }
  393. }
  394. }
  395. func (i *objectIter) Close() {
  396. i.iter.Close()
  397. }
  398. // isInvalid checks whether an error is an os.PathError with an os.ErrInvalid
  399. // error inside. It also checks for the windows error, which is different from
  400. // os.ErrInvalid.
  401. func isInvalid(err error) bool {
  402. pe, ok := err.(*os.PathError)
  403. if !ok {
  404. return false
  405. }
  406. errstr := pe.Err.Error()
  407. return errstr == errInvalidUnix || errstr == errInvalidWindows
  408. }
  409. // errInvalidWindows is the Windows equivalent to os.ErrInvalid
  410. const errInvalidWindows = "The parameter is incorrect."
  411. var errInvalidUnix = os.ErrInvalid.Error()