parser.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483
  1. package packfile
  2. import (
  3. "bytes"
  4. "errors"
  5. "io"
  6. "gopkg.in/src-d/go-git.v4/plumbing"
  7. "gopkg.in/src-d/go-git.v4/plumbing/cache"
  8. "gopkg.in/src-d/go-git.v4/plumbing/storer"
  9. )
  10. var (
  11. // ErrReferenceDeltaNotFound is returned when the reference delta is not
  12. // found.
  13. ErrReferenceDeltaNotFound = errors.New("reference delta not found")
  14. // ErrNotSeekableSource is returned when the source for the parser is not
  15. // seekable and a storage was not provided, so it can't be parsed.
  16. ErrNotSeekableSource = errors.New("parser source is not seekable and storage was not provided")
  17. // ErrDeltaNotCached is returned when the delta could not be found in cache.
  18. ErrDeltaNotCached = errors.New("delta could not be found in cache")
  19. )
  20. // Observer interface is implemented by index encoders.
  21. type Observer interface {
  22. // OnHeader is called when a new packfile is opened.
  23. OnHeader(count uint32) error
  24. // OnInflatedObjectHeader is called for each object header read.
  25. OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error
  26. // OnInflatedObjectContent is called for each decoded object.
  27. OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32, content []byte) error
  28. // OnFooter is called when decoding is done.
  29. OnFooter(h plumbing.Hash) error
  30. }
  31. // Parser decodes a packfile and calls any observer associated to it. Is used
  32. // to generate indexes.
  33. type Parser struct {
  34. storage storer.EncodedObjectStorer
  35. scanner *Scanner
  36. count uint32
  37. oi []*objectInfo
  38. oiByHash map[plumbing.Hash]*objectInfo
  39. oiByOffset map[int64]*objectInfo
  40. hashOffset map[plumbing.Hash]int64
  41. checksum plumbing.Hash
  42. cache *cache.BufferLRU
  43. // delta content by offset, only used if source is not seekable
  44. deltas map[int64][]byte
  45. ob []Observer
  46. }
  47. // NewParser creates a new Parser. The Scanner source must be seekable.
  48. // If it's not, NewParserWithStorage should be used instead.
  49. func NewParser(scanner *Scanner, ob ...Observer) (*Parser, error) {
  50. return NewParserWithStorage(scanner, nil, ob...)
  51. }
  52. // NewParserWithStorage creates a new Parser. The scanner source must either
  53. // be seekable or a storage must be provided.
  54. func NewParserWithStorage(
  55. scanner *Scanner,
  56. storage storer.EncodedObjectStorer,
  57. ob ...Observer,
  58. ) (*Parser, error) {
  59. if !scanner.IsSeekable && storage == nil {
  60. return nil, ErrNotSeekableSource
  61. }
  62. var deltas map[int64][]byte
  63. if !scanner.IsSeekable {
  64. deltas = make(map[int64][]byte)
  65. }
  66. return &Parser{
  67. storage: storage,
  68. scanner: scanner,
  69. ob: ob,
  70. count: 0,
  71. cache: cache.NewBufferLRUDefault(),
  72. deltas: deltas,
  73. }, nil
  74. }
  75. func (p *Parser) forEachObserver(f func(o Observer) error) error {
  76. for _, o := range p.ob {
  77. if err := f(o); err != nil {
  78. return err
  79. }
  80. }
  81. return nil
  82. }
  83. func (p *Parser) onHeader(count uint32) error {
  84. return p.forEachObserver(func(o Observer) error {
  85. return o.OnHeader(count)
  86. })
  87. }
  88. func (p *Parser) onInflatedObjectHeader(
  89. t plumbing.ObjectType,
  90. objSize int64,
  91. pos int64,
  92. ) error {
  93. return p.forEachObserver(func(o Observer) error {
  94. return o.OnInflatedObjectHeader(t, objSize, pos)
  95. })
  96. }
  97. func (p *Parser) onInflatedObjectContent(
  98. h plumbing.Hash,
  99. pos int64,
  100. crc uint32,
  101. content []byte,
  102. ) error {
  103. return p.forEachObserver(func(o Observer) error {
  104. return o.OnInflatedObjectContent(h, pos, crc, content)
  105. })
  106. }
  107. func (p *Parser) onFooter(h plumbing.Hash) error {
  108. return p.forEachObserver(func(o Observer) error {
  109. return o.OnFooter(h)
  110. })
  111. }
  112. // Parse start decoding phase of the packfile.
  113. func (p *Parser) Parse() (plumbing.Hash, error) {
  114. if err := p.init(); err != nil {
  115. return plumbing.ZeroHash, err
  116. }
  117. if err := p.indexObjects(); err != nil {
  118. return plumbing.ZeroHash, err
  119. }
  120. var err error
  121. p.checksum, err = p.scanner.Checksum()
  122. if err != nil && err != io.EOF {
  123. return plumbing.ZeroHash, err
  124. }
  125. if err := p.resolveDeltas(); err != nil {
  126. return plumbing.ZeroHash, err
  127. }
  128. if err := p.onFooter(p.checksum); err != nil {
  129. return plumbing.ZeroHash, err
  130. }
  131. return p.checksum, nil
  132. }
  133. func (p *Parser) init() error {
  134. _, c, err := p.scanner.Header()
  135. if err != nil {
  136. return err
  137. }
  138. if err := p.onHeader(c); err != nil {
  139. return err
  140. }
  141. p.count = c
  142. p.oiByHash = make(map[plumbing.Hash]*objectInfo, p.count)
  143. p.oiByOffset = make(map[int64]*objectInfo, p.count)
  144. p.oi = make([]*objectInfo, p.count)
  145. return nil
  146. }
  147. func (p *Parser) indexObjects() error {
  148. buf := new(bytes.Buffer)
  149. for i := uint32(0); i < p.count; i++ {
  150. buf.Reset()
  151. oh, err := p.scanner.NextObjectHeader()
  152. if err != nil {
  153. return err
  154. }
  155. delta := false
  156. var ota *objectInfo
  157. switch t := oh.Type; t {
  158. case plumbing.OFSDeltaObject:
  159. delta = true
  160. parent, ok := p.oiByOffset[oh.OffsetReference]
  161. if !ok {
  162. return plumbing.ErrObjectNotFound
  163. }
  164. ota = newDeltaObject(oh.Offset, oh.Length, t, parent)
  165. parent.Children = append(parent.Children, ota)
  166. case plumbing.REFDeltaObject:
  167. delta = true
  168. parent, ok := p.oiByHash[oh.Reference]
  169. if !ok {
  170. // can't find referenced object in this pack file
  171. // this must be a "thin" pack.
  172. parent = &objectInfo{ //Placeholder parent
  173. SHA1: oh.Reference,
  174. ExternalRef: true, // mark as an external reference that must be resolved
  175. Type: plumbing.AnyObject,
  176. DiskType: plumbing.AnyObject,
  177. }
  178. p.oiByHash[oh.Reference] = parent
  179. }
  180. ota = newDeltaObject(oh.Offset, oh.Length, t, parent)
  181. parent.Children = append(parent.Children, ota)
  182. default:
  183. ota = newBaseObject(oh.Offset, oh.Length, t)
  184. }
  185. _, crc, err := p.scanner.NextObject(buf)
  186. if err != nil {
  187. return err
  188. }
  189. ota.Crc32 = crc
  190. ota.Length = oh.Length
  191. data := buf.Bytes()
  192. if !delta {
  193. sha1, err := getSHA1(ota.Type, data)
  194. if err != nil {
  195. return err
  196. }
  197. ota.SHA1 = sha1
  198. p.oiByHash[ota.SHA1] = ota
  199. }
  200. if p.storage != nil && !delta {
  201. obj := new(plumbing.MemoryObject)
  202. obj.SetSize(oh.Length)
  203. obj.SetType(oh.Type)
  204. if _, err := obj.Write(data); err != nil {
  205. return err
  206. }
  207. if _, err := p.storage.SetEncodedObject(obj); err != nil {
  208. return err
  209. }
  210. }
  211. if delta && !p.scanner.IsSeekable {
  212. p.deltas[oh.Offset] = make([]byte, len(data))
  213. copy(p.deltas[oh.Offset], data)
  214. }
  215. p.oiByOffset[oh.Offset] = ota
  216. p.oi[i] = ota
  217. }
  218. return nil
  219. }
  220. func (p *Parser) resolveDeltas() error {
  221. for _, obj := range p.oi {
  222. content, err := p.get(obj)
  223. if err != nil {
  224. return err
  225. }
  226. if err := p.onInflatedObjectHeader(obj.Type, obj.Length, obj.Offset); err != nil {
  227. return err
  228. }
  229. if err := p.onInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, content); err != nil {
  230. return err
  231. }
  232. if !obj.IsDelta() && len(obj.Children) > 0 {
  233. for _, child := range obj.Children {
  234. if _, err := p.resolveObject(child, content); err != nil {
  235. return err
  236. }
  237. }
  238. // Remove the delta from the cache.
  239. if obj.DiskType.IsDelta() && !p.scanner.IsSeekable {
  240. delete(p.deltas, obj.Offset)
  241. }
  242. }
  243. }
  244. return nil
  245. }
  246. func (p *Parser) get(o *objectInfo) (b []byte, err error) {
  247. var ok bool
  248. if !o.ExternalRef { // skip cache check for placeholder parents
  249. b, ok = p.cache.Get(o.Offset)
  250. }
  251. // If it's not on the cache and is not a delta we can try to find it in the
  252. // storage, if there's one. External refs must enter here.
  253. if !ok && p.storage != nil && !o.Type.IsDelta() {
  254. e, err := p.storage.EncodedObject(plumbing.AnyObject, o.SHA1)
  255. if err != nil {
  256. return nil, err
  257. }
  258. o.Type = e.Type()
  259. r, err := e.Reader()
  260. if err != nil {
  261. return nil, err
  262. }
  263. b = make([]byte, e.Size())
  264. if _, err = r.Read(b); err != nil {
  265. return nil, err
  266. }
  267. }
  268. if b != nil {
  269. return b, nil
  270. }
  271. if o.ExternalRef {
  272. // we were not able to resolve a ref in a thin pack
  273. return nil, ErrReferenceDeltaNotFound
  274. }
  275. var data []byte
  276. if o.DiskType.IsDelta() {
  277. base, err := p.get(o.Parent)
  278. if err != nil {
  279. return nil, err
  280. }
  281. data, err = p.resolveObject(o, base)
  282. if err != nil {
  283. return nil, err
  284. }
  285. } else {
  286. data, err = p.readData(o)
  287. if err != nil {
  288. return nil, err
  289. }
  290. }
  291. if len(o.Children) > 0 {
  292. p.cache.Put(o.Offset, data)
  293. }
  294. return data, nil
  295. }
  296. func (p *Parser) resolveObject(
  297. o *objectInfo,
  298. base []byte,
  299. ) ([]byte, error) {
  300. if !o.DiskType.IsDelta() {
  301. return nil, nil
  302. }
  303. data, err := p.readData(o)
  304. if err != nil {
  305. return nil, err
  306. }
  307. data, err = applyPatchBase(o, data, base)
  308. if err != nil {
  309. return nil, err
  310. }
  311. if p.storage != nil {
  312. obj := new(plumbing.MemoryObject)
  313. obj.SetSize(o.Size())
  314. obj.SetType(o.Type)
  315. if _, err := obj.Write(data); err != nil {
  316. return nil, err
  317. }
  318. if _, err := p.storage.SetEncodedObject(obj); err != nil {
  319. return nil, err
  320. }
  321. }
  322. return data, nil
  323. }
  324. func (p *Parser) readData(o *objectInfo) ([]byte, error) {
  325. if !p.scanner.IsSeekable && o.DiskType.IsDelta() {
  326. data, ok := p.deltas[o.Offset]
  327. if !ok {
  328. return nil, ErrDeltaNotCached
  329. }
  330. return data, nil
  331. }
  332. if _, err := p.scanner.SeekObjectHeader(o.Offset); err != nil {
  333. return nil, err
  334. }
  335. buf := new(bytes.Buffer)
  336. if _, _, err := p.scanner.NextObject(buf); err != nil {
  337. return nil, err
  338. }
  339. return buf.Bytes(), nil
  340. }
  341. func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) {
  342. patched, err := PatchDelta(base, data)
  343. if err != nil {
  344. return nil, err
  345. }
  346. if ota.SHA1 == plumbing.ZeroHash {
  347. ota.Type = ota.Parent.Type
  348. sha1, err := getSHA1(ota.Type, patched)
  349. if err != nil {
  350. return nil, err
  351. }
  352. ota.SHA1 = sha1
  353. ota.Length = int64(len(patched))
  354. }
  355. return patched, nil
  356. }
  357. func getSHA1(t plumbing.ObjectType, data []byte) (plumbing.Hash, error) {
  358. hasher := plumbing.NewHasher(t, int64(len(data)))
  359. if _, err := hasher.Write(data); err != nil {
  360. return plumbing.ZeroHash, err
  361. }
  362. return hasher.Sum(), nil
  363. }
  364. type objectInfo struct {
  365. Offset int64
  366. Length int64
  367. Type plumbing.ObjectType
  368. DiskType plumbing.ObjectType
  369. ExternalRef bool // indicates this is an external reference in a thin pack file
  370. Crc32 uint32
  371. Parent *objectInfo
  372. Children []*objectInfo
  373. SHA1 plumbing.Hash
  374. }
  375. func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo {
  376. return newDeltaObject(offset, length, t, nil)
  377. }
  378. func newDeltaObject(
  379. offset, length int64,
  380. t plumbing.ObjectType,
  381. parent *objectInfo,
  382. ) *objectInfo {
  383. obj := &objectInfo{
  384. Offset: offset,
  385. Length: length,
  386. Type: t,
  387. DiskType: t,
  388. Crc32: 0,
  389. Parent: parent,
  390. }
  391. return obj
  392. }
  393. func (o *objectInfo) IsDelta() bool {
  394. return o.Type.IsDelta()
  395. }
  396. func (o *objectInfo) Size() int64 {
  397. return o.Length
  398. }