From 49b9f7bc926f9645e203294355ea69bde5825a8f Mon Sep 17 00:00:00 2001 From: Liang Ding Date: Fri, 27 May 2022 12:56:23 +0800 Subject: [PATCH] =?UTF-8?q?:art:=20=E6=8F=92=E5=85=A5=E8=BE=83=E5=A4=A7?= =?UTF-8?q?=E7=9A=84=E8=B5=84=E6=BA=90=E6=96=87=E4=BB=B6=E6=97=B6=E5=86=85?= =?UTF-8?q?=E5=AD=98=E5=8D=A0=E7=94=A8=E8=BE=83=E5=A4=A7=20https://github.?= =?UTF-8?q?com/siyuan-note/siyuan/issues/5023?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kernel/model/osssync.go | 67 +------------------------------ kernel/model/sync.go | 2 +- kernel/model/upload.go | 57 +++++++++++++++----------- kernel/server/serve.go | 2 +- kernel/sql/aseet.go | 6 +-- kernel/sql/database.go | 18 ++++----- kernel/util/etag.go | 88 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 137 insertions(+), 103 deletions(-) create mode 100644 kernel/util/etag.go diff --git a/kernel/model/osssync.go b/kernel/model/osssync.go index 10d81e7f9..49209273d 100644 --- a/kernel/model/osssync.go +++ b/kernel/model/osssync.go @@ -17,13 +17,9 @@ package model import ( - "bytes" "context" - "crypto/sha1" - "encoding/base64" "errors" "fmt" - "io" "io/fs" "os" "path" @@ -662,7 +658,7 @@ func localUpsertRemoveListOSS(localDirPath string, cloudFileList map[string]*Clo return nil } - localHash, hashErr := GetEtag(path) + localHash, hashErr := util.GetEtag(path) if nil != hashErr { util.LogErrorf("get local file [%s] etag failed: %s", path, hashErr) return nil @@ -696,7 +692,7 @@ func cloudUpsertRemoveListOSS(localDirPath string, cloudFileList map[string]*Clo continue } - localHash, hashErr := GetEtag(localCheckPath) + localHash, hashErr := util.GetEtag(localCheckPath) if nil != hashErr { util.LogErrorf("get local file [%s] hash failed: %s", localCheckPath, hashErr) err = hashErr @@ -760,62 +756,3 @@ func putFileToCloud(filePath, key, upToken string) (err error) { } return } - -// 以下是七牛云 Hash 算法实现 https://github.com/qiniu/qetag/blob/master/qetag.go - -func GetEtag(filename string) (etag string, err error) { - f, err := os.Open(filename) - if err != nil { - return - } - defer f.Close() - - fi, err := f.Stat() - if err != nil { - return - } - - fsize := fi.Size() - blockCnt := BlockCount(fsize) - sha1Buf := make([]byte, 0, 21) - - if blockCnt <= 1 { // file size <= 4M - sha1Buf = append(sha1Buf, 0x16) - sha1Buf, err = CalSha1(sha1Buf, f) - if err != nil { - return - } - } else { // file size > 4M - sha1Buf = append(sha1Buf, 0x96) - sha1BlockBuf := make([]byte, 0, blockCnt*20) - for i := 0; i < blockCnt; i++ { - body := io.LimitReader(f, BLOCK_SIZE) - sha1BlockBuf, err = CalSha1(sha1BlockBuf, body) - if err != nil { - return - } - } - sha1Buf, _ = CalSha1(sha1Buf, bytes.NewReader(sha1BlockBuf)) - } - etag = base64.URLEncoding.EncodeToString(sha1Buf) - return -} - -const ( - BLOCK_BITS = 22 // Indicate that the blocksize is 4M - BLOCK_SIZE = 1 << BLOCK_BITS -) - -func BlockCount(fsize int64) int { - return int((fsize + (BLOCK_SIZE - 1)) >> BLOCK_BITS) -} - -func CalSha1(b []byte, r io.Reader) ([]byte, error) { - - h := sha1.New() - _, err := io.Copy(h, r) - if err != nil { - return nil, err - } - return h.Sum(b), nil -} diff --git a/kernel/model/sync.go b/kernel/model/sync.go index 87daae260..53cae0c43 100644 --- a/kernel/model/sync.go +++ b/kernel/model/sync.go @@ -571,7 +571,7 @@ func genCloudIndex(localDirPath string, excludes map[string]bool) (err error) { return nil } - hash, hashErr := GetEtag(path) + hash, hashErr := util.GetEtag(path) if nil != hashErr { util.LogErrorf("get file [%s] hash failed: %s", path, hashErr) return hashErr diff --git a/kernel/model/upload.go b/kernel/model/upload.go index 0b8c78b49..20bd1cdb7 100644 --- a/kernel/model/upload.go +++ b/kernel/model/upload.go @@ -17,9 +17,7 @@ package model import ( - "crypto/sha256" "errors" - "fmt" "io" "os" "path" @@ -58,20 +56,22 @@ func InsertLocalAssets(id string, assetPaths []string) (succMap map[string]inter continue } - var f *os.File - f, err = os.Open(p) - if nil != err { + fi, statErr := os.Stat(p) + if nil != statErr { + err = statErr + return + } + f, openErr := os.Open(p) + if nil != openErr { + err = openErr + return + } + hash, hashErr := util.GetEtagByHandle(f, fi.Size()) + if nil != hashErr { + f.Close() return } - var data []byte - data, err = io.ReadAll(f) - f.Close() - if nil != err { - return - } - - hash := fmt.Sprintf("%x", sha256.Sum256(data)) if existAsset := sql.QueryAssetByHash(hash); nil != existAsset { // 已经存在同样数据的资源文件的话不重复保存 succMap[baseName] = existAsset.Path @@ -80,9 +80,15 @@ func InsertLocalAssets(id string, assetPaths []string) (succMap map[string]inter fName = fName[0 : len(fName)-len(ext)] fName = fName + "-" + ast.NewNodeID() + ext writePath := filepath.Join(assets, fName) - if err = gulu.File.WriteFileSafer(writePath, data, 0644); nil != err { + if _, err = f.Seek(0, io.SeekStart); nil != err { + f.Close() return } + if err = gulu.File.WriteFileSaferByReader(writePath, f, 0644); nil != err { + f.Close() + return + } + f.Close() succMap[baseName] = "assets/" + fName } } @@ -134,22 +140,21 @@ func Upload(c *gin.Context) { ext = strings.ToLower(ext) fName += ext baseName := fName - f, err := file.Open() - if nil != err { + f, openErr := file.Open() + if nil != openErr { errFiles = append(errFiles, fName) - ret.Msg = err.Error() + ret.Msg = openErr.Error() break } - data, err := io.ReadAll(f) - if nil != err { + hash, hashErr := util.GetEtagByHandle(f, file.Size) + if nil != hashErr { errFiles = append(errFiles, fName) ret.Msg = err.Error() + f.Close() break } - f.Close() - hash := fmt.Sprintf("%x", sha256.Sum256(data)) if existAsset := sql.QueryAssetByHash(hash); nil != existAsset { // 已经存在同样数据的资源文件的话不重复保存 succMap[baseName] = existAsset.Path @@ -168,11 +173,19 @@ func Upload(c *gin.Context) { } } writePath := filepath.Join(assetsDirPath, fName) - if err = gulu.File.WriteFileSafer(writePath, data, 0644); nil != err { + if _, err = f.Seek(0, io.SeekStart); nil != err { errFiles = append(errFiles, fName) ret.Msg = err.Error() + f.Close() break } + if err = gulu.File.WriteFileSaferByReader(writePath, f, 0644); nil != err { + errFiles = append(errFiles, fName) + ret.Msg = err.Error() + f.Close() + break + } + f.Close() succMap[baseName] = "assets/" + fName } } diff --git a/kernel/server/serve.go b/kernel/server/serve.go index cc21db53b..23dc07d91 100644 --- a/kernel/server/serve.go +++ b/kernel/server/serve.go @@ -44,7 +44,7 @@ var cookieStore = cookie.NewStore([]byte("ATN51UlxVq1Gcvdf")) func Serve(fastMode bool) { gin.SetMode(gin.ReleaseMode) ginServer := gin.New() - ginServer.MaxMultipartMemory = 1024 * 1024 * 1024 * 4 + ginServer.MaxMultipartMemory = 1024 * 1024 * 32 // 32MB ginServer.Use(gin.Recovery()) ginServer.Use(cors.Default()) ginServer.Use(gzip.Gzip(gzip.DefaultCompression, gzip.WithExcludedExtensions([]string{".pdf", ".mp3", ".wav", ".ogg", ".mov", ".weba", ".mkv", ".mp4", ".webm"}))) diff --git a/kernel/sql/aseet.go b/kernel/sql/aseet.go index 94cc274ca..1d5daadd9 100644 --- a/kernel/sql/aseet.go +++ b/kernel/sql/aseet.go @@ -20,7 +20,6 @@ import ( "crypto/sha256" "database/sql" "fmt" - "os" "path/filepath" "strings" @@ -71,12 +70,11 @@ func docTitleImgAsset(root *ast.Node) *Asset { } var hash string + var err error absPath := filepath.Join(util.DataDir, p) - if data, err := os.ReadFile(absPath); nil != err { + if hash, err = util.GetEtag(absPath); nil != err { util.LogErrorf("read asset [%s] data failed: %s", absPath, err) hash = fmt.Sprintf("%x", sha256.Sum256([]byte(gulu.Rand.String(7)))) - } else { - hash = fmt.Sprintf("%x", sha256.Sum256(data)) } name, _ := util.LastID(p) asset := &Asset{ diff --git a/kernel/sql/database.go b/kernel/sql/database.go index 327f45e65..b241b4294 100644 --- a/kernel/sql/database.go +++ b/kernel/sql/database.go @@ -18,10 +18,8 @@ package sql import ( "bytes" - "crypto/sha256" "database/sql" "errors" - "fmt" "os" "path/filepath" "regexp" @@ -522,12 +520,12 @@ func buildSpanFromNode(n *ast.Node, tree *parse.Tree, rootID, boxID, p string) ( } var hash string + var hashErr error if lp := assetLocalPath(dest, boxLocalPath, docDirLocalPath); "" != lp { if !gulu.File.IsDir(lp) { - if data, err := os.ReadFile(lp); nil != err { - util.LogErrorf("read asset [%s] data failed: %s", lp, err) - } else { - hash = fmt.Sprintf("%x", sha256.Sum256(data)) + hash, hashErr = util.GetEtag(lp) + if nil != hashErr { + util.LogErrorf("calc asset [%s] hash failed: %s", lp, hashErr) } } } @@ -597,11 +595,11 @@ func buildSpanFromNode(n *ast.Node, tree *parse.Tree, rootID, boxID, p string) ( dest := string(src) var hash string + var hashErr error if lp := assetLocalPath(dest, boxLocalPath, docDirLocalPath); "" != lp { - if data, err := os.ReadFile(lp); nil != err { - util.LogErrorf("read asset [%s] data failed: %s", lp, err) - } else { - hash = fmt.Sprintf("%x", sha256.Sum256(data)) + hash, hashErr = util.GetEtag(lp) + if nil != hashErr { + util.LogErrorf("calc asset [%s] hash failed: %s", lp, hashErr) } } diff --git a/kernel/util/etag.go b/kernel/util/etag.go new file mode 100644 index 000000000..f84241160 --- /dev/null +++ b/kernel/util/etag.go @@ -0,0 +1,88 @@ +// SiYuan - Build Your Eternal Digital Garden +// Copyright (c) 2020-present, b3log.org +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package util + +import ( + "bytes" + "crypto/sha1" + "encoding/base64" + "io" + "os" +) + +// 以下是七牛云 Hash 算法实现 https://github.com/qiniu/qetag/blob/master/qetag.go + +func GetEtagByHandle(f io.Reader, size int64) (etag string, err error) { + blockCnt := BlockCount(size) + sha1Buf := make([]byte, 0, 21) + + if blockCnt <= 1 { // file size <= 4M + sha1Buf = append(sha1Buf, 0x16) + sha1Buf, err = CalSha1(sha1Buf, f) + if err != nil { + return + } + } else { // file size > 4M + sha1Buf = append(sha1Buf, 0x96) + sha1BlockBuf := make([]byte, 0, blockCnt*20) + for i := 0; i < blockCnt; i++ { + body := io.LimitReader(f, BLOCK_SIZE) + sha1BlockBuf, err = CalSha1(sha1BlockBuf, body) + if err != nil { + return + } + } + sha1Buf, _ = CalSha1(sha1Buf, bytes.NewReader(sha1BlockBuf)) + } + etag = base64.URLEncoding.EncodeToString(sha1Buf) + return +} + +func GetEtag(filename string) (etag string, err error) { + f, err := os.Open(filename) + if err != nil { + return + } + defer f.Close() + + fi, err := f.Stat() + if err != nil { + return + } + + etag, err = GetEtagByHandle(f, fi.Size()) + return +} + +const ( + BLOCK_BITS = 22 // Indicate that the blocksize is 4M + BLOCK_SIZE = 1 << BLOCK_BITS +) + +func BlockCount(fsize int64) int { + return int((fsize + (BLOCK_SIZE - 1)) >> BLOCK_BITS) +} + +func CalSha1(b []byte, r io.Reader) ([]byte, error) { + + h := sha1.New() + _, err := io.Copy(h, r) + if err != nil { + return nil, err + } + return h.Sum(b), nil +}