scanner.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. package packfile
  2. import (
  3. "bufio"
  4. "bytes"
  5. "compress/zlib"
  6. "fmt"
  7. "hash"
  8. "hash/crc32"
  9. "io"
  10. stdioutil "io/ioutil"
  11. "sync"
  12. "gopkg.in/src-d/go-git.v4/plumbing"
  13. "gopkg.in/src-d/go-git.v4/utils/binary"
  14. "gopkg.in/src-d/go-git.v4/utils/ioutil"
  15. )
  16. var (
  17. // ErrEmptyPackfile is returned by ReadHeader when no data is found in the packfile
  18. ErrEmptyPackfile = NewError("empty packfile")
  19. // ErrBadSignature is returned by ReadHeader when the signature in the packfile is incorrect.
  20. ErrBadSignature = NewError("malformed pack file signature")
  21. // ErrUnsupportedVersion is returned by ReadHeader when the packfile version is
  22. // different than VersionSupported.
  23. ErrUnsupportedVersion = NewError("unsupported packfile version")
  24. // ErrSeekNotSupported returned if seek is not support
  25. ErrSeekNotSupported = NewError("not seek support")
  26. )
  27. // ObjectHeader contains the information related to the object, this information
  28. // is collected from the previous bytes to the content of the object.
  29. type ObjectHeader struct {
  30. Type plumbing.ObjectType
  31. Offset int64
  32. Length int64
  33. Reference plumbing.Hash
  34. OffsetReference int64
  35. }
  36. type Scanner struct {
  37. r reader
  38. zr readerResetter
  39. crc hash.Hash32
  40. // pendingObject is used to detect if an object has been read, or still
  41. // is waiting to be read
  42. pendingObject *ObjectHeader
  43. version, objects uint32
  44. // lsSeekable says if this scanner can do Seek or not, to have a Scanner
  45. // seekable a r implementing io.Seeker is required
  46. IsSeekable bool
  47. }
  48. // NewScanner returns a new Scanner based on a reader, if the given reader
  49. // implements io.ReadSeeker the Scanner will be also Seekable
  50. func NewScanner(r io.Reader) *Scanner {
  51. seeker, ok := r.(io.ReadSeeker)
  52. if !ok {
  53. seeker = &trackableReader{Reader: r}
  54. }
  55. crc := crc32.NewIEEE()
  56. return &Scanner{
  57. r: newTeeReader(newByteReadSeeker(seeker), crc),
  58. crc: crc,
  59. IsSeekable: ok,
  60. }
  61. }
  62. // Header reads the whole packfile header (signature, version and object count).
  63. // It returns the version and the object count and performs checks on the
  64. // validity of the signature and the version fields.
  65. func (s *Scanner) Header() (version, objects uint32, err error) {
  66. if s.version != 0 {
  67. return s.version, s.objects, nil
  68. }
  69. sig, err := s.readSignature()
  70. if err != nil {
  71. if err == io.EOF {
  72. err = ErrEmptyPackfile
  73. }
  74. return
  75. }
  76. if !s.isValidSignature(sig) {
  77. err = ErrBadSignature
  78. return
  79. }
  80. version, err = s.readVersion()
  81. s.version = version
  82. if err != nil {
  83. return
  84. }
  85. if !s.isSupportedVersion(version) {
  86. err = ErrUnsupportedVersion.AddDetails("%d", version)
  87. return
  88. }
  89. objects, err = s.readCount()
  90. s.objects = objects
  91. return
  92. }
  93. // readSignature reads an returns the signature field in the packfile.
  94. func (s *Scanner) readSignature() ([]byte, error) {
  95. var sig = make([]byte, 4)
  96. if _, err := io.ReadFull(s.r, sig); err != nil {
  97. return []byte{}, err
  98. }
  99. return sig, nil
  100. }
  101. // isValidSignature returns if sig is a valid packfile signature.
  102. func (s *Scanner) isValidSignature(sig []byte) bool {
  103. return bytes.Equal(sig, signature)
  104. }
  105. // readVersion reads and returns the version field of a packfile.
  106. func (s *Scanner) readVersion() (uint32, error) {
  107. return binary.ReadUint32(s.r)
  108. }
  109. // isSupportedVersion returns whether version v is supported by the parser.
  110. // The current supported version is VersionSupported, defined above.
  111. func (s *Scanner) isSupportedVersion(v uint32) bool {
  112. return v == VersionSupported
  113. }
  114. // readCount reads and returns the count of objects field of a packfile.
  115. func (s *Scanner) readCount() (uint32, error) {
  116. return binary.ReadUint32(s.r)
  117. }
  118. // SeekObjectHeader seeks to specified offset and returns the ObjectHeader
  119. // for the next object in the reader
  120. func (s *Scanner) SeekObjectHeader(offset int64) (*ObjectHeader, error) {
  121. // if seeking we assume that you are not interested in the header
  122. if s.version == 0 {
  123. s.version = VersionSupported
  124. }
  125. if _, err := s.r.Seek(offset, io.SeekStart); err != nil {
  126. return nil, err
  127. }
  128. h, err := s.nextObjectHeader()
  129. if err != nil {
  130. return nil, err
  131. }
  132. h.Offset = offset
  133. return h, nil
  134. }
  135. // NextObjectHeader returns the ObjectHeader for the next object in the reader
  136. func (s *Scanner) NextObjectHeader() (*ObjectHeader, error) {
  137. if err := s.doPending(); err != nil {
  138. return nil, err
  139. }
  140. offset, err := s.r.Seek(0, io.SeekCurrent)
  141. if err != nil {
  142. return nil, err
  143. }
  144. h, err := s.nextObjectHeader()
  145. if err != nil {
  146. return nil, err
  147. }
  148. h.Offset = offset
  149. return h, nil
  150. }
  151. // nextObjectHeader returns the ObjectHeader for the next object in the reader
  152. // without the Offset field
  153. func (s *Scanner) nextObjectHeader() (*ObjectHeader, error) {
  154. defer s.Flush()
  155. s.crc.Reset()
  156. h := &ObjectHeader{}
  157. s.pendingObject = h
  158. var err error
  159. h.Offset, err = s.r.Seek(0, io.SeekCurrent)
  160. if err != nil {
  161. return nil, err
  162. }
  163. h.Type, h.Length, err = s.readObjectTypeAndLength()
  164. if err != nil {
  165. return nil, err
  166. }
  167. switch h.Type {
  168. case plumbing.OFSDeltaObject:
  169. no, err := binary.ReadVariableWidthInt(s.r)
  170. if err != nil {
  171. return nil, err
  172. }
  173. h.OffsetReference = h.Offset - no
  174. case plumbing.REFDeltaObject:
  175. var err error
  176. h.Reference, err = binary.ReadHash(s.r)
  177. if err != nil {
  178. return nil, err
  179. }
  180. }
  181. return h, nil
  182. }
  183. func (s *Scanner) doPending() error {
  184. if s.version == 0 {
  185. var err error
  186. s.version, s.objects, err = s.Header()
  187. if err != nil {
  188. return err
  189. }
  190. }
  191. return s.discardObjectIfNeeded()
  192. }
  193. func (s *Scanner) discardObjectIfNeeded() error {
  194. if s.pendingObject == nil {
  195. return nil
  196. }
  197. h := s.pendingObject
  198. n, _, err := s.NextObject(stdioutil.Discard)
  199. if err != nil {
  200. return err
  201. }
  202. if n != h.Length {
  203. return fmt.Errorf(
  204. "error discarding object, discarded %d, expected %d",
  205. n, h.Length,
  206. )
  207. }
  208. return nil
  209. }
  210. // ReadObjectTypeAndLength reads and returns the object type and the
  211. // length field from an object entry in a packfile.
  212. func (s *Scanner) readObjectTypeAndLength() (plumbing.ObjectType, int64, error) {
  213. t, c, err := s.readType()
  214. if err != nil {
  215. return t, 0, err
  216. }
  217. l, err := s.readLength(c)
  218. return t, l, err
  219. }
  220. func (s *Scanner) readType() (plumbing.ObjectType, byte, error) {
  221. var c byte
  222. var err error
  223. if c, err = s.r.ReadByte(); err != nil {
  224. return plumbing.ObjectType(0), 0, err
  225. }
  226. typ := parseType(c)
  227. return typ, c, nil
  228. }
  229. func parseType(b byte) plumbing.ObjectType {
  230. return plumbing.ObjectType((b & maskType) >> firstLengthBits)
  231. }
  232. // the length is codified in the last 4 bits of the first byte and in
  233. // the last 7 bits of subsequent bytes. Last byte has a 0 MSB.
  234. func (s *Scanner) readLength(first byte) (int64, error) {
  235. length := int64(first & maskFirstLength)
  236. c := first
  237. shift := firstLengthBits
  238. var err error
  239. for c&maskContinue > 0 {
  240. if c, err = s.r.ReadByte(); err != nil {
  241. return 0, err
  242. }
  243. length += int64(c&maskLength) << shift
  244. shift += lengthBits
  245. }
  246. return length, nil
  247. }
  248. // NextObject writes the content of the next object into the reader, returns
  249. // the number of bytes written, the CRC32 of the content and an error, if any
  250. func (s *Scanner) NextObject(w io.Writer) (written int64, crc32 uint32, err error) {
  251. defer s.crc.Reset()
  252. s.pendingObject = nil
  253. written, err = s.copyObject(w)
  254. s.Flush()
  255. crc32 = s.crc.Sum32()
  256. return
  257. }
  258. // ReadRegularObject reads and write a non-deltified object
  259. // from it zlib stream in an object entry in the packfile.
  260. func (s *Scanner) copyObject(w io.Writer) (n int64, err error) {
  261. if s.zr == nil {
  262. var zr io.ReadCloser
  263. zr, err = zlib.NewReader(s.r)
  264. if err != nil {
  265. return 0, fmt.Errorf("zlib initialization error: %s", err)
  266. }
  267. s.zr = zr.(readerResetter)
  268. } else {
  269. if err = s.zr.Reset(s.r, nil); err != nil {
  270. return 0, fmt.Errorf("zlib reset error: %s", err)
  271. }
  272. }
  273. defer ioutil.CheckClose(s.zr, &err)
  274. buf := byteSlicePool.Get().([]byte)
  275. n, err = io.CopyBuffer(w, s.zr, buf)
  276. byteSlicePool.Put(buf)
  277. return
  278. }
  279. var byteSlicePool = sync.Pool{
  280. New: func() interface{} {
  281. return make([]byte, 32*1024)
  282. },
  283. }
  284. // SeekFromStart sets a new offset from start, returns the old position before
  285. // the change.
  286. func (s *Scanner) SeekFromStart(offset int64) (previous int64, err error) {
  287. // if seeking we assume that you are not interested in the header
  288. if s.version == 0 {
  289. s.version = VersionSupported
  290. }
  291. previous, err = s.r.Seek(0, io.SeekCurrent)
  292. if err != nil {
  293. return -1, err
  294. }
  295. _, err = s.r.Seek(offset, io.SeekStart)
  296. return previous, err
  297. }
  298. // Checksum returns the checksum of the packfile
  299. func (s *Scanner) Checksum() (plumbing.Hash, error) {
  300. err := s.discardObjectIfNeeded()
  301. if err != nil {
  302. return plumbing.ZeroHash, err
  303. }
  304. return binary.ReadHash(s.r)
  305. }
  306. // Close reads the reader until io.EOF
  307. func (s *Scanner) Close() error {
  308. buf := byteSlicePool.Get().([]byte)
  309. _, err := io.CopyBuffer(stdioutil.Discard, s.r, buf)
  310. byteSlicePool.Put(buf)
  311. return err
  312. }
  313. // Flush finishes writing the buffer to crc hasher in case we are using
  314. // a teeReader. Otherwise it is a no-op.
  315. func (s *Scanner) Flush() error {
  316. tee, ok := s.r.(*teeReader)
  317. if ok {
  318. return tee.Flush()
  319. }
  320. return nil
  321. }
  322. type trackableReader struct {
  323. count int64
  324. io.Reader
  325. }
  326. // Read reads up to len(p) bytes into p.
  327. func (r *trackableReader) Read(p []byte) (n int, err error) {
  328. n, err = r.Reader.Read(p)
  329. r.count += int64(n)
  330. return
  331. }
  332. // Seek only supports io.SeekCurrent, any other operation fails
  333. func (r *trackableReader) Seek(offset int64, whence int) (int64, error) {
  334. if whence != io.SeekCurrent {
  335. return -1, ErrSeekNotSupported
  336. }
  337. return r.count, nil
  338. }
  339. func newByteReadSeeker(r io.ReadSeeker) *bufferedSeeker {
  340. return &bufferedSeeker{
  341. r: r,
  342. Reader: *bufio.NewReader(r),
  343. }
  344. }
  345. type bufferedSeeker struct {
  346. r io.ReadSeeker
  347. bufio.Reader
  348. }
  349. func (r *bufferedSeeker) Seek(offset int64, whence int) (int64, error) {
  350. if whence == io.SeekCurrent && offset == 0 {
  351. current, err := r.r.Seek(offset, whence)
  352. if err != nil {
  353. return current, err
  354. }
  355. return current - int64(r.Buffered()), nil
  356. }
  357. defer r.Reader.Reset(r.r)
  358. return r.r.Seek(offset, whence)
  359. }
  360. type readerResetter interface {
  361. io.ReadCloser
  362. zlib.Resetter
  363. }
  364. type reader interface {
  365. io.Reader
  366. io.ByteReader
  367. io.Seeker
  368. }
  369. type teeReader struct {
  370. reader
  371. w hash.Hash32
  372. bufWriter *bufio.Writer
  373. }
  374. func newTeeReader(r reader, h hash.Hash32) *teeReader {
  375. return &teeReader{
  376. reader: r,
  377. w: h,
  378. bufWriter: bufio.NewWriter(h),
  379. }
  380. }
  381. func (r *teeReader) Read(p []byte) (n int, err error) {
  382. r.Flush()
  383. n, err = r.reader.Read(p)
  384. if n > 0 {
  385. if n, err := r.w.Write(p[:n]); err != nil {
  386. return n, err
  387. }
  388. }
  389. return
  390. }
  391. func (r *teeReader) ReadByte() (b byte, err error) {
  392. b, err = r.reader.ReadByte()
  393. if err == nil {
  394. return b, r.bufWriter.WriteByte(b)
  395. }
  396. return
  397. }
  398. func (r *teeReader) Flush() (err error) {
  399. return r.bufWriter.Flush()
  400. }