| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487 |
- package packfile
- import (
- "bufio"
- "bytes"
- "compress/zlib"
- "fmt"
- "hash"
- "hash/crc32"
- "io"
- stdioutil "io/ioutil"
- "sync"
- "gopkg.in/src-d/go-git.v4/plumbing"
- "gopkg.in/src-d/go-git.v4/utils/binary"
- "gopkg.in/src-d/go-git.v4/utils/ioutil"
- )
- var (
- // ErrEmptyPackfile is returned by ReadHeader when no data is found in the packfile
- ErrEmptyPackfile = NewError("empty packfile")
- // ErrBadSignature is returned by ReadHeader when the signature in the packfile is incorrect.
- ErrBadSignature = NewError("malformed pack file signature")
- // ErrUnsupportedVersion is returned by ReadHeader when the packfile version is
- // different than VersionSupported.
- ErrUnsupportedVersion = NewError("unsupported packfile version")
- // ErrSeekNotSupported returned if seek is not support
- ErrSeekNotSupported = NewError("not seek support")
- )
- // ObjectHeader contains the information related to the object, this information
- // is collected from the previous bytes to the content of the object.
- type ObjectHeader struct {
- Type plumbing.ObjectType
- Offset int64
- Length int64
- Reference plumbing.Hash
- OffsetReference int64
- }
- type Scanner struct {
- r reader
- zr readerResetter
- crc hash.Hash32
- // pendingObject is used to detect if an object has been read, or still
- // is waiting to be read
- pendingObject *ObjectHeader
- version, objects uint32
- // lsSeekable says if this scanner can do Seek or not, to have a Scanner
- // seekable a r implementing io.Seeker is required
- IsSeekable bool
- }
- // NewScanner returns a new Scanner based on a reader, if the given reader
- // implements io.ReadSeeker the Scanner will be also Seekable
- func NewScanner(r io.Reader) *Scanner {
- seeker, ok := r.(io.ReadSeeker)
- if !ok {
- seeker = &trackableReader{Reader: r}
- }
- crc := crc32.NewIEEE()
- return &Scanner{
- r: newTeeReader(newByteReadSeeker(seeker), crc),
- crc: crc,
- IsSeekable: ok,
- }
- }
- // Header reads the whole packfile header (signature, version and object count).
- // It returns the version and the object count and performs checks on the
- // validity of the signature and the version fields.
- func (s *Scanner) Header() (version, objects uint32, err error) {
- if s.version != 0 {
- return s.version, s.objects, nil
- }
- sig, err := s.readSignature()
- if err != nil {
- if err == io.EOF {
- err = ErrEmptyPackfile
- }
- return
- }
- if !s.isValidSignature(sig) {
- err = ErrBadSignature
- return
- }
- version, err = s.readVersion()
- s.version = version
- if err != nil {
- return
- }
- if !s.isSupportedVersion(version) {
- err = ErrUnsupportedVersion.AddDetails("%d", version)
- return
- }
- objects, err = s.readCount()
- s.objects = objects
- return
- }
- // readSignature reads an returns the signature field in the packfile.
- func (s *Scanner) readSignature() ([]byte, error) {
- var sig = make([]byte, 4)
- if _, err := io.ReadFull(s.r, sig); err != nil {
- return []byte{}, err
- }
- return sig, nil
- }
- // isValidSignature returns if sig is a valid packfile signature.
- func (s *Scanner) isValidSignature(sig []byte) bool {
- return bytes.Equal(sig, signature)
- }
- // readVersion reads and returns the version field of a packfile.
- func (s *Scanner) readVersion() (uint32, error) {
- return binary.ReadUint32(s.r)
- }
- // isSupportedVersion returns whether version v is supported by the parser.
- // The current supported version is VersionSupported, defined above.
- func (s *Scanner) isSupportedVersion(v uint32) bool {
- return v == VersionSupported
- }
- // readCount reads and returns the count of objects field of a packfile.
- func (s *Scanner) readCount() (uint32, error) {
- return binary.ReadUint32(s.r)
- }
- // SeekObjectHeader seeks to specified offset and returns the ObjectHeader
- // for the next object in the reader
- func (s *Scanner) SeekObjectHeader(offset int64) (*ObjectHeader, error) {
- // if seeking we assume that you are not interested in the header
- if s.version == 0 {
- s.version = VersionSupported
- }
- if _, err := s.r.Seek(offset, io.SeekStart); err != nil {
- return nil, err
- }
- h, err := s.nextObjectHeader()
- if err != nil {
- return nil, err
- }
- h.Offset = offset
- return h, nil
- }
- // NextObjectHeader returns the ObjectHeader for the next object in the reader
- func (s *Scanner) NextObjectHeader() (*ObjectHeader, error) {
- if err := s.doPending(); err != nil {
- return nil, err
- }
- offset, err := s.r.Seek(0, io.SeekCurrent)
- if err != nil {
- return nil, err
- }
- h, err := s.nextObjectHeader()
- if err != nil {
- return nil, err
- }
- h.Offset = offset
- return h, nil
- }
- // nextObjectHeader returns the ObjectHeader for the next object in the reader
- // without the Offset field
- func (s *Scanner) nextObjectHeader() (*ObjectHeader, error) {
- defer s.Flush()
- s.crc.Reset()
- h := &ObjectHeader{}
- s.pendingObject = h
- var err error
- h.Offset, err = s.r.Seek(0, io.SeekCurrent)
- if err != nil {
- return nil, err
- }
- h.Type, h.Length, err = s.readObjectTypeAndLength()
- if err != nil {
- return nil, err
- }
- switch h.Type {
- case plumbing.OFSDeltaObject:
- no, err := binary.ReadVariableWidthInt(s.r)
- if err != nil {
- return nil, err
- }
- h.OffsetReference = h.Offset - no
- case plumbing.REFDeltaObject:
- var err error
- h.Reference, err = binary.ReadHash(s.r)
- if err != nil {
- return nil, err
- }
- }
- return h, nil
- }
- func (s *Scanner) doPending() error {
- if s.version == 0 {
- var err error
- s.version, s.objects, err = s.Header()
- if err != nil {
- return err
- }
- }
- return s.discardObjectIfNeeded()
- }
- func (s *Scanner) discardObjectIfNeeded() error {
- if s.pendingObject == nil {
- return nil
- }
- h := s.pendingObject
- n, _, err := s.NextObject(stdioutil.Discard)
- if err != nil {
- return err
- }
- if n != h.Length {
- return fmt.Errorf(
- "error discarding object, discarded %d, expected %d",
- n, h.Length,
- )
- }
- return nil
- }
- // ReadObjectTypeAndLength reads and returns the object type and the
- // length field from an object entry in a packfile.
- func (s *Scanner) readObjectTypeAndLength() (plumbing.ObjectType, int64, error) {
- t, c, err := s.readType()
- if err != nil {
- return t, 0, err
- }
- l, err := s.readLength(c)
- return t, l, err
- }
- func (s *Scanner) readType() (plumbing.ObjectType, byte, error) {
- var c byte
- var err error
- if c, err = s.r.ReadByte(); err != nil {
- return plumbing.ObjectType(0), 0, err
- }
- typ := parseType(c)
- return typ, c, nil
- }
- func parseType(b byte) plumbing.ObjectType {
- return plumbing.ObjectType((b & maskType) >> firstLengthBits)
- }
- // the length is codified in the last 4 bits of the first byte and in
- // the last 7 bits of subsequent bytes. Last byte has a 0 MSB.
- func (s *Scanner) readLength(first byte) (int64, error) {
- length := int64(first & maskFirstLength)
- c := first
- shift := firstLengthBits
- var err error
- for c&maskContinue > 0 {
- if c, err = s.r.ReadByte(); err != nil {
- return 0, err
- }
- length += int64(c&maskLength) << shift
- shift += lengthBits
- }
- return length, nil
- }
- // NextObject writes the content of the next object into the reader, returns
- // the number of bytes written, the CRC32 of the content and an error, if any
- func (s *Scanner) NextObject(w io.Writer) (written int64, crc32 uint32, err error) {
- defer s.crc.Reset()
- s.pendingObject = nil
- written, err = s.copyObject(w)
- s.Flush()
- crc32 = s.crc.Sum32()
- return
- }
- // ReadRegularObject reads and write a non-deltified object
- // from it zlib stream in an object entry in the packfile.
- func (s *Scanner) copyObject(w io.Writer) (n int64, err error) {
- if s.zr == nil {
- var zr io.ReadCloser
- zr, err = zlib.NewReader(s.r)
- if err != nil {
- return 0, fmt.Errorf("zlib initialization error: %s", err)
- }
- s.zr = zr.(readerResetter)
- } else {
- if err = s.zr.Reset(s.r, nil); err != nil {
- return 0, fmt.Errorf("zlib reset error: %s", err)
- }
- }
- defer ioutil.CheckClose(s.zr, &err)
- buf := byteSlicePool.Get().([]byte)
- n, err = io.CopyBuffer(w, s.zr, buf)
- byteSlicePool.Put(buf)
- return
- }
- var byteSlicePool = sync.Pool{
- New: func() interface{} {
- return make([]byte, 32*1024)
- },
- }
- // SeekFromStart sets a new offset from start, returns the old position before
- // the change.
- func (s *Scanner) SeekFromStart(offset int64) (previous int64, err error) {
- // if seeking we assume that you are not interested in the header
- if s.version == 0 {
- s.version = VersionSupported
- }
- previous, err = s.r.Seek(0, io.SeekCurrent)
- if err != nil {
- return -1, err
- }
- _, err = s.r.Seek(offset, io.SeekStart)
- return previous, err
- }
- // Checksum returns the checksum of the packfile
- func (s *Scanner) Checksum() (plumbing.Hash, error) {
- err := s.discardObjectIfNeeded()
- if err != nil {
- return plumbing.ZeroHash, err
- }
- return binary.ReadHash(s.r)
- }
- // Close reads the reader until io.EOF
- func (s *Scanner) Close() error {
- buf := byteSlicePool.Get().([]byte)
- _, err := io.CopyBuffer(stdioutil.Discard, s.r, buf)
- byteSlicePool.Put(buf)
- return err
- }
- // Flush finishes writing the buffer to crc hasher in case we are using
- // a teeReader. Otherwise it is a no-op.
- func (s *Scanner) Flush() error {
- tee, ok := s.r.(*teeReader)
- if ok {
- return tee.Flush()
- }
- return nil
- }
- type trackableReader struct {
- count int64
- io.Reader
- }
- // Read reads up to len(p) bytes into p.
- func (r *trackableReader) Read(p []byte) (n int, err error) {
- n, err = r.Reader.Read(p)
- r.count += int64(n)
- return
- }
- // Seek only supports io.SeekCurrent, any other operation fails
- func (r *trackableReader) Seek(offset int64, whence int) (int64, error) {
- if whence != io.SeekCurrent {
- return -1, ErrSeekNotSupported
- }
- return r.count, nil
- }
- func newByteReadSeeker(r io.ReadSeeker) *bufferedSeeker {
- return &bufferedSeeker{
- r: r,
- Reader: *bufio.NewReader(r),
- }
- }
- type bufferedSeeker struct {
- r io.ReadSeeker
- bufio.Reader
- }
- func (r *bufferedSeeker) Seek(offset int64, whence int) (int64, error) {
- if whence == io.SeekCurrent && offset == 0 {
- current, err := r.r.Seek(offset, whence)
- if err != nil {
- return current, err
- }
- return current - int64(r.Buffered()), nil
- }
- defer r.Reader.Reset(r.r)
- return r.r.Seek(offset, whence)
- }
- type readerResetter interface {
- io.ReadCloser
- zlib.Resetter
- }
- type reader interface {
- io.Reader
- io.ByteReader
- io.Seeker
- }
- type teeReader struct {
- reader
- w hash.Hash32
- bufWriter *bufio.Writer
- }
- func newTeeReader(r reader, h hash.Hash32) *teeReader {
- return &teeReader{
- reader: r,
- w: h,
- bufWriter: bufio.NewWriter(h),
- }
- }
- func (r *teeReader) Read(p []byte) (n int, err error) {
- r.Flush()
- n, err = r.reader.Read(p)
- if n > 0 {
- if n, err := r.w.Write(p[:n]); err != nil {
- return n, err
- }
- }
- return
- }
- func (r *teeReader) ReadByte() (b byte, err error) {
- b, err = r.reader.ReadByte()
- if err == nil {
- return b, r.bufWriter.WriteByte(b)
- }
- return
- }
- func (r *teeReader) Flush() (err error) {
- return r.bufWriter.Flush()
- }
|