Use of go io package (TeeReader, MultiReader, MultiWriter, Pipe)

Posted by shakazulu on Sat, 25 Dec 2021 15:50:05 +0100

background

We are uploading files today. After uploading files, we need to compare whether the hash values of uploaded files and local files are consistent.
For the upload client, the hash value of the file can be calculated separately in advance, and then compared with the hash value returned by the server after successful upload. But obviously, this is not an ideal scheme. It is reasonable to upload and calculate at the same time.
At the beginning, I used the Pipe() method of the io package. I haven't used it much before. It's quite popular. I took a look at the source code implementation, followed by the use of other methods of the io package, and finally extended this article.

TeeReader

func TeeReader(r Reader, w Writer) Reader {
	return &teeReader{r, w}
}

While reading and writing, the reader returned by the function will read the content from the receiving parameter r and write the content to w.

Source code

type teeReader struct {
	r Reader
	w Writer
}

func (t *teeReader) Read(p []byte) (n int, err error) {
	n, err = t.r.Read(p)
	if n > 0 {
		if n, err := t.w.Write(p[:n]); err != nil {
			return n, err
		}
	}
	return
}

Simple example

It can simplify the code, such as calculating the file hash (the use here is not necessarily reasonable, just for example)

func TeeGetFileMD5(path string) (string, error) {
	file, err := os.Open(path)
	if err != nil {
		return "", err
	}
	h := md5.New()
	tr := io.TeeReader(file, h)
	_, err = io.ReadAll(tr) // optimization
	if err != nil {
		return "", err
	}
	return fmt.Sprintf("%x", h.Sum(nil)), nil
}

MultiReader

func MultiReader(readers ...Reader) Reader {
	r := make([]Reader, len(readers))
	copy(r, readers)
	return &multiReader{r}
}

Read all readers in sequence until there is an error or all readers are read, and then return EOF.

Source code

type multiReader struct {
	readers []Reader
}

func (mr *multiReader) Read(p []byte) (n int, err error) {
	for len(mr.readers) > 0 {
		// Optimization to flatten nested multiReaders (Issue 13558).
		if len(mr.readers) == 1 { // Nesting of adaptive multireaders
			if r, ok := mr.readers[0].(*multiReader); ok {
				mr.readers = r.readers
				continue
			}
		}
		n, err = mr.readers[0].Read(p)
		if err == EOF { // After reading one, remove it from the array and replace it with the next one
			// Use eofReader instead of nil to avoid nil panic
			// after performing flatten (Issue 18232).
			mr.readers[0] = eofReader{} // permit earlier GC
			mr.readers = mr.readers[1:]
		}
		if n > 0 || err != EOF {
			if err == EOF && len(mr.readers) > 0 { // It is EOF. Read the last one and return to EOF
				// Don't return EOF yet. More readers remain.
				err = nil
			}
			return 
		}
	}
	return 0, EOF
}

Simple example

func tMultiReader() {
	r1 := bytes.NewReader([]byte("ABC"))
	r2 := bytes.NewReader([]byte("DEF"))
	reader := io.MultiReader(r1, r2)
	var buf = make([]byte, 1)
	for {
		n, err := reader.Read(buf)
		if err != nil {
			if err == io.EOF {
				return 
			}
			fmt.Println(err)
			return
		}
		fmt.Println(string(buf[:n])) // ABCDEF
	}
}

MultiWriter

func MultiWriter(writers ...Writer) Writer {
	allWriters := make([]Writer, 0, len(writers))
	for _, w := range writers {
		if mw, ok := w.(*multiWriter); ok {
			allWriters = append(allWriters, mw.writers...)
		} else {
			allWriters = append(allWriters, w)
		}
	}
	return &multiWriter{allWriters}
}

Write one is more. When the writer returned by the function performs a write operation, it will copy all input parameter writers. When there are multiple output points, using it directly will simplify a lot of code.

Source code

type multiWriter struct {
	writers []Writer
}

func (t *multiWriter) Write(p []byte) (n int, err error) {
	for _, w := range t.writers {
		n, err = w.Write(p)
		if err != nil {
			return
		}
		if n != len(p) {
			err = ErrShortWrite
			return
		}
	}
	return len(p), nil
}

Simple example

func tMultiWriter() {
	var buf []byte
	w1 := bytes.NewBuffer(buf)
	w2 := bytes.NewBuffer(buf)
	writer := io.MultiWriter(w1, w2)
	_, err := writer.Write([]byte("123"))
	if err != nil {
		fmt.Println(err)
		return
	}

	w1Res, err := ioutil.ReadAll(w1)
	fmt.Println(string(w1Res), err) // 123 <nil>
	w2Res, err := ioutil.ReadAll(w2)
	fmt.Println(string(w2Res), err) // 123 <nil>
}

Pipe

func Pipe() (*PipeReader, *PipeWriter) {
	p := &pipe{
		wrCh: make(chan []byte),
		rdCh: make(chan int),
		done: make(chan struct{}),
	}
	return &PipeReader{p}, &PipeWriter{p}
}
type pipe struct {
	wrMu sync.Mutex // Serializes Write operations
	wrCh chan []byte
	rdCh chan int

	once sync.Once // Protects closing done
	done chan struct{}
	rerr onceError
	werr onceError
}

The PipeReader and PipeWriter returned by the function have Close and CloseWithError methods to stop reading and writing (done).

  • Data written by wrCh
  • How much did rdCh read
  • Once close once done
  • done end flag
  • rerr read wrong
  • werr misspelled

Source code

func (p *pipe) Write(b []byte) (n int, err error) {
	select {
	case <-p.done:
		return 0, p.writeCloseError()
	default:
		p.wrMu.Lock()
		defer p.wrMu.Unlock()
	}

	for once := true; once || len(b) > 0; once = false {
		select {
		case p.wrCh <- b:
			nw := <-p.rdCh
			b = b[nw:]
			n += nw
		case <-p.done:
			return n, p.writeCloseError()
		}
	}
	return n, nil
}

func (p *pipe) Read(b []byte) (n int, err error) {
	select {
	case <-p.done:
		return 0, p.readCloseError()
	default:
	}

	select {
	case bw := <-p.wrCh:
		nr := copy(b, bw)
		p.rdCh <- nr
		return nr, nil
	case <-p.done:
		return 0, p.readCloseError()
	}
}

Both the Read and Write methods begin with the following code. Since the case selection of the channel is random, it is necessary to ensure that the Read or Write operation is not finished.

	select {
	case <-p.done:
		return 0, p.writeCloseError()
	default:
		p.wrMu.Lock()
		defer p.wrMu.Unlock()
	}

Simple example

func tPipe() {
	r, w := io.Pipe()
	go func  () {
		for i := 0; i < 3 ; i++ {
			fmt.Println("write now!")
			n, err := w.Write([]byte("hello"))
			if err != nil {
				fmt.Println("write err:", err.Error())
			} else {
				fmt.Println("write end n:", n)
			}
		}
		w.Close()
	}()
	//time.Sleep(time.Second)
	b := make([]byte, 100)
	for {
		n, err := r.Read(b)
		if err != nil {
			if err != io.EOF {
				fmt.Println("read err:", err.Error())
			}
			break
		} else {
			fmt.Println("read:", string(b[:n]))
		}
	}
}
// write now!
// read: hello
// write end n: 5
// write now!
// read: hello
// write end n: 5
// write now!
// read: hello
// write end n: 5

Combination of TeeReader and MultiWriter

Example: copy the file and calculate the hash value of the file

func copyFileWithHash() {
	f, dstF, hashW, err := getTestRW()
	if err != nil {
		fmt.Println(err)
		return
	}

	now := time.Now()
	defer func() {
		fmt.Println("Time consuming:", time.Now().Sub(now))
	}()

	multiW := io.MultiWriter(dstF, hashW)

	teeR := io.TeeReader(f, multiW)


	buf := make([]byte, 512)
	for {
		_, err := teeR.Read(buf)
		if err == io.EOF {
			break
		}
		utils.CheckErr(err)
	}

	fmt.Printf("file sha256: %x\n",  hashW.Sum(nil))
	// File size: 1840640
	// File sha256: b61ec80071fc414c44ff1a05f323679f9fc3e7caa2a68363019663fc16677568
	// Time: 15.881ms
}

func getTestRW() (f, dstF *os.File, shaW hash.Hash, err error) {
	f, err = os.Open(`E:\test\html report-1628662433.tar`)
	if err != nil {
		return
	}
	fInfo, err := f.Stat()
	if err != nil {
		return
	}
	fmt.Println("File size:", fInfo.Size())

	dstF, err = os.Create(`E:\test\1.tar`)
	if err != nil {
		return
	}

	shaW = sha256.New()
	return
}

summary

Some methods of io package are still very useful. The implementation is not very complex. If you are interested, you can see the specific implementation of each method. This article only makes a brief introduction. In the actual development process, the purpose can be completely achieved without using these methods, which may be a little cumbersome.

  • One heart and two uses of TeeReader (written while reading)
  • First come, first served (sequential read) of MultiReader
  • MultiWriter's sharing joys and sorrows (writing one means more)
  • Never do Pipe first (write - > read - > write - > read - >...)

Topics: Go