Skip to content

Commit

Permalink
Split into multiple files (#3)
Browse files Browse the repository at this point in the history
split files
  • Loading branch information
pjdufour authored Aug 24, 2019
1 parent b23c3b2 commit a238038
Show file tree
Hide file tree
Showing 20 changed files with 490 additions and 82 deletions.
212 changes: 181 additions & 31 deletions cmd/grw/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@
package main

import (
"bufio"
"fmt"
"io"
"os"
"os/signal"
"strconv"
"strings"
"sync"
"syscall"
Expand Down Expand Up @@ -46,7 +48,10 @@ const (
flagOutputBufferSize string = "output-buffer-size"
flagOutputAppend string = "output-append"
flagOutputOverwrite string = "output-overwrite"
flagSplitLines string = "split-lines"
flagVerbose string = "verbose"

NumberReplacementCharacter string = "#"
)

func initFlags(flag *pflag.FlagSet) {
Expand All @@ -65,9 +70,49 @@ func initFlags(flag *pflag.FlagSet) {
flag.BoolP(flagOutputAppend, "a", false, "append to output files")
flag.BoolP(flagOutputOverwrite, "o", false, "overwrite output if it already exists")

flag.IntP(
flagSplitLines,
"l",
-1,
fmt.Sprintf("split output by a number of lines, replaces %q in output uri with file number starting with 1.", NumberReplacementCharacter),
)

flag.BoolP(flagVerbose, "v", false, "verbose output")
}

func initViper(flag *pflag.FlagSet) (*viper.Viper, error) {
v := viper.New()
err := v.BindPFlags(flag)
if err != nil {
return nil, err
}
v.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
v.AutomaticEnv()
return v, nil
}

func checkConfig(args []string, v *viper.Viper) error {

if len(args) != 2 {
return fmt.Errorf("expecting 2 arguments, found %d arguments", len(args))
}

outputUri := args[1]

splitLines := v.GetInt(flagSplitLines)
if splitLines > 0 {

if !strings.Contains(outputUri, NumberReplacementCharacter) {
return fmt.Errorf(
"when splitting by lines, you must include the number replacement character (%q) in the output uri",
NumberReplacementCharacter,
)
}

}
return nil
}

func main() {

rootCommand := cobra.Command{
Expand All @@ -85,16 +130,14 @@ func main() {

flag := cmd.Flags()

v := viper.New()
err = v.BindPFlags(flag)
v, err := initViper(flag)
if err != nil {
return err
return errors.Wrap(err, "error initializing viper")
}
v.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
v.AutomaticEnv()

if len(args) != 2 {
return fmt.Errorf("expecting 2 arguments, found %d arguments", len(args))
err = checkConfig(args, v)
if err != nil {
return err
}

inputUri := args[0]
Expand Down Expand Up @@ -148,9 +191,12 @@ func main() {
}

outputCompression := v.GetString(flagOutputCompression)
outputOverwrite := v.GetBool(flagOutputOverwrite)
outputAppend := v.GetBool(flagOutputAppend)
outputBufferSize := v.GetInt(flagOutputBufferSize)

splitLines := v.GetInt(flagSplitLines)

var outputWriter grw.ByteWriteCloser
var outputBuffer grw.Buffer

Expand All @@ -165,7 +211,24 @@ func main() {
return errors.Wrapf(err, "error opening bytes buffer for %q", outputUri)
}
} else {
outputWriter, err = grw.WriteToResource(outputUri, outputCompression, outputAppend, s3Client)
uri := outputUri
if splitLines > 0 {
uri = strings.ReplaceAll(outputUri, NumberReplacementCharacter, "1")
}
if (!outputOverwrite) && (!outputAppend) {
exists, _, err := grw.Stat(uri)
if err != nil {
return errors.Wrapf(err, "error statting uri %q", uri)
}
if exists {
return fmt.Errorf("file already exists at uri %q and neither append or overwrite is set", uri)
}
}
outputWriter, err = grw.WriteToResource(
uri,
outputCompression,
outputAppend,
s3Client)
if err != nil {
return errors.Wrapf(err, "error opening resource at uri %q", outputUri)
}
Expand Down Expand Up @@ -197,38 +260,124 @@ func main() {
}()

brokenPipe := false
go func() {
eof := false
for (!updateGracefulShutdown(nil)) && (!eof) && (!brokenPipe) {
if splitLines > 0 {
go func() {
eof := false

b := make([]byte, outputBufferSize)
n, err := inputReader.Read(b)
if err != nil {
if err == io.EOF {
eof = true
} else {
fmt.Fprint(os.Stderr, errors.Wrapf(err, "error reading from resource at uri %q", inputUri).Error())
scanner := bufio.NewScanner(inputReader)
files := 1
lines := 0

for (!updateGracefulShutdown(nil)) && (!eof) && (!brokenPipe) && scanner.Scan() {

if lines == splitLines {

err := outputWriter.Flush()
if err != nil {
fmt.Fprint(os.Stderr, errors.Wrapf(err, "error flushing resource at uri %q", strings.ReplaceAll(outputUri, NumberReplacementCharacter, strconv.Itoa(files))).Error())
break
}

err = outputWriter.Close()
if err != nil {
fmt.Fprint(os.Stderr, errors.Wrapf(err, "error closing resource at uri %q", strings.ReplaceAll(outputUri, NumberReplacementCharacter, strconv.Itoa(files))).Error())
break
}

// increment files number
files++

uri := strings.ReplaceAll(outputUri, NumberReplacementCharacter, strconv.Itoa(files))

if (!outputOverwrite) && (!outputAppend) {
exists, _, err := grw.Stat(uri)
if err != nil {
fmt.Fprint(os.Stderr, errors.Wrapf(err, "error statting uri %q", uri).Error())
break
}
if exists {
fmt.Fprint(os.Stderr, fmt.Errorf("file already exists at uri %q and neither append or overwrite is set", uri).Error())
break
}
}

ow, err := grw.WriteToResource(
uri,
outputCompression,
outputAppend,
s3Client)
if err != nil {
fmt.Fprint(os.Stderr, errors.Wrapf(err, "error opening resource at uri %q", outputUri).Error())
break
}

outputWriter = ow

lines = 0
}

line := scanner.Text()

if gracefulShutdown {
break
}

_, err = outputWriter.WriteLine(line)
if err != nil {
if perr, ok := err.(*os.PathError); ok {
if perr.Err == syscall.EPIPE {
brokenPipe = true
break
}
}
fmt.Fprint(os.Stderr, errors.Wrapf(err, "error writing to resource at uri %q", outputUri).Error())
break
}

// increment counter
lines++
}

if gracefulShutdown {
break
if err := scanner.Err(); err != nil {
fmt.Fprint(os.Stderr, errors.Wrapf(err, "error reading from resource at uri %q", inputUri).Error())
}

_, err = outputWriter.Write(b[:n])
if err != nil {
if perr, ok := err.(*os.PathError); ok {
if perr.Err == syscall.EPIPE {
brokenPipe = true
break
wg.Done()
}()
} else {
go func() {
eof := false
for (!updateGracefulShutdown(nil)) && (!eof) && (!brokenPipe) {

b := make([]byte, outputBufferSize)
n, err := inputReader.Read(b)
if err != nil {
if err == io.EOF {
eof = true
} else {
fmt.Fprint(os.Stderr, errors.Wrapf(err, "error reading from resource at uri %q", inputUri).Error())
}
}
fmt.Fprint(os.Stderr, errors.Wrapf(err, "error writing to resource at uri %q", outputUri).Error())
}

}
wg.Done()
}()
if gracefulShutdown {
break
}

_, err = outputWriter.Write(b[:n])
if err != nil {
if perr, ok := err.(*os.PathError); ok {
if perr.Err == syscall.EPIPE {
brokenPipe = true
break
}
}
fmt.Fprint(os.Stderr, errors.Wrapf(err, "error writing to resource at uri %q", outputUri).Error())
}

}
wg.Done()
}()
}

wg.Wait() // wait until done writing or received signal for graceful shutdown

Expand Down Expand Up @@ -259,6 +408,7 @@ func main() {
if verbose && !brokenPipe {
fmt.Println("Done in " + elapsed.String())
}

return nil
},
}
Expand Down
2 changes: 1 addition & 1 deletion examples/c/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ main(int argc, char **argv) {

printf("Schemes: %s\n", Schemes());

char *input_uri = "https://raw.githubusercontent.com/spatialcurrent/go-reader-writer/master/test/doc.txt";
char *input_uri = "https://raw.githubusercontent.com/spatialcurrent/go-reader-writer/master/testdata/doc.txt";
char *input_alg = "none";
char *output_string;

Expand Down
2 changes: 1 addition & 1 deletion examples/cpp/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ int main(int argc, char **argv) {

// Since Go requires non-const values, we must define our parameters as variables
// https://stackoverflow.com/questions/4044255/passing-a-string-literal-to-a-function-that-takes-a-stdstring
std::string input_uri("https://raw.githubusercontent.com/spatialcurrent/go-reader-writer/master/test/doc.txt.bz2");
std::string input_uri("https://raw.githubusercontent.com/spatialcurrent/go-reader-writer/master/testdata/doc.txt.bz2");
std::string input_alg("bzip2");
char *output_char_ptr;

Expand Down
2 changes: 1 addition & 1 deletion examples/python/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

# Define input and output variables
# Output must be a ctypec_char_p
input_uri = "https://raw.githubusercontent.com/spatialcurrent/go-reader-writer/master/test/doc.txt";
input_uri = "https://raw.githubusercontent.com/spatialcurrent/go-reader-writer/master/testdata/doc.txt";
input_alg = "none"
output_string_pointer = c_char_p()

Expand Down
1 change: 1 addition & 0 deletions pkg/grw/Buffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ type Buffer interface {
WriteString(s string) (n int, err error)
Bytes() []byte
String() string
Len() int
}
4 changes: 2 additions & 2 deletions pkg/grw/BuilderOpen.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ import (

func (b *Builder) Open() (ByteReadCloser, *Metadata, error) {

if b.uri == "stdin" {
brc, err := ReadStdin(b.uri)
if b.uri == "stdin" || b.uri == "-" {
brc, err := ReadStdin(b.alg)
return brc, nil, err
}

Expand Down
42 changes: 42 additions & 0 deletions pkg/grw/Mkdirs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// =================================================================
//
// Copyright (C) 2019 Spatial Current, Inc. - All Rights Reserved
// Released as open source under the MIT License. See LICENSE file.
//
// =================================================================

package grw

import (
"os"
"path/filepath"

homedir "github.com/mitchellh/go-homedir"
"github.com/pkg/errors"
)

// Mkdirs create the directory at the given path and all parent directories, if they do not exist.
// Mkdirs expands the home directory and resolves the path given.
func Mkdirs(p string) error {

if len(p) == 0 {
return ErrPathMissing
}

pathExpanded, err := homedir.Expand(p)
if err != nil {
return errors.Wrapf(err, "error expanding file path %q", p)
}

pathAbsolute, err := filepath.Abs(pathExpanded)
if err != nil {
return errors.Wrapf(err, "error resolving file path %q", pathAbsolute)
}

err = os.MkdirAll(pathAbsolute, 0750)
if err != nil {
return errors.Wrapf(err, "error creating parent directories for %q", p)
}

return nil
}
Loading

0 comments on commit a238038

Please sign in to comment.