Mercurial > hg > Members > anatofuz > lectable
changeset 14:bac5eb544d4d
fix
author | anatofuz <anatofuz@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 01 Apr 2020 19:10:30 +0900 |
parents | 989cfda07d71 |
children | 8313f8fd6a44 |
files | 2020/previous/dumpFromPDF.json cmd_donwload.go cmd_pdf.go go.mod go.sum pdf.go syllabus/getSyllabus.go |
diffstat | 7 files changed, 120 insertions(+), 37 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/2020/previous/dumpFromPDF.json Wed Apr 01 19:10:30 2020 +0900 @@ -0,0 +1,1 @@ +[{"Id":"101501081","IsSelect":false,"Place":"共3-205","Day":"月2","Grades":[1]},{"Id":"101501081","IsSelect":false,"Place":"共3-403","Day":"木2","Grades":[1]},{"Id":"101501091","IsSelect":false,"Place":"共3-403","Day":"月2","Grades":[1]},{"Id":"101501091","IsSelect":false,"Place":"共3-201","Day":"木2","Grades":[1]},{"Id":"101526071","IsSelect":true,"Place":"共3-102","Day":"","Grades":[4]},{"Id":"101526081","IsSelect":true,"Place":"共3-405","Day":"","Grades":[4]},{"Id":"101526091","IsSelect":true,"Place":"共3-202","Day":"","Grades":[4]},{"Id":"100826081","IsSelect":true,"Place":"共1-217","Day":"","Grades":[1]},{"Id":"100408031","IsSelect":false,"Place":"共1-118","Day":"","Grades":[4]},{"Id":"610000071","IsSelect":false,"Place":"工1-321","Day":"","Grades":[1]},{"Id":"610002071","IsSelect":false,"Place":"工1-321","Day":"","Grades":[1]},{"Id":"610004071","IsSelect":false,"Place":"工1-321","Day":"","Grades":[1]},{"Id":"610005071","IsSelect":true,"Place":"工1-321","Day":"","Grades":[1]},{"Id":"610012071","IsSelect":false,"Place":"工1-221","Day":"","Grades":[1]},{"Id":"610013071","IsSelect":false,"Place":"工1-322","Day":"","Grades":[3]},{"Id":"610019001","IsSelect":true,"Place":"工1-322","Day":"","Grades":[3]},{"Id":"610042001","IsSelect":true,"Place":"工1-509","Day":"","Grades":[4]},{"Id":"617000001","IsSelect":false,"Place":"工1-321","Day":"","Grades":[1]},{"Id":"617003011","IsSelect":false,"Place":"地創棟508","Day":"","Grades":[2]},{"Id":"617003021","IsSelect":false,"Place":"地創棟508","Day":"","Grades":[2]},{"Id":"617005001","IsSelect":true,"Place":"工1-322","Day":"","Grades":[2]},{"Id":"617006001","IsSelect":false,"Place":"工1-322","Day":"","Grades":[2]},{"Id":"617007001","IsSelect":false,"Place":"工1-321","Day":"","Grades":[2]},{"Id":"617008001","IsSelect":false,"Place":"工1-321","Day":"","Grades":[2]},{"Id":"617009001","IsSelect":false,"Place":"工1-321","Day":"","Grades":[2]},{"Id":"617015001","IsSelect":true,"Place":"工1-322","Day":"","Grades":[2]},{"Id":"617016001","IsSelect":true,"Place":"工1-322","Day":"","Grades":[2]},{"Id":"617021001","IsSelect":false,"Place":"地創棟508","Day":"","Grades":[3]},{"Id":"617022001","IsSelect":true,"Place":"工1-321","Day":"","Grades":[3]},{"Id":"617023001","IsSelect":true,"Place":"工1-321","Day":"","Grades":[3]},{"Id":"617028001","IsSelect":true,"Place":"工2-313","Day":"","Grades":[3]},{"Id":"617038001","IsSelect":true,"Place":"工1-321","Day":"","Grades":[3]},{"Id":"617039001","IsSelect":true,"Place":"共4-305","Day":"","Grades":[4]},{"Id":"610000071","IsSelect":false,"Place":"工1-321","Day":"","Grades":[1]},{"Id":"610001011","IsSelect":false,"Place":"工1-221","Day":"","Grades":[1]},{"Id":"610001021","IsSelect":false,"Place":"工1-222","Day":"","Grades":[1]},{"Id":"610001031","IsSelect":false,"Place":"工1-221","Day":"","Grades":[1]},{"Id":"610001041","IsSelect":false,"Place":"工1-222","Day":"","Grades":[1]},{"Id":"610002021","IsSelect":true,"Place":"工4-111","Day":"","Grades":[1]},{"Id":"610002071","IsSelect":false,"Place":"工1-321","Day":"","Grades":[1]},{"Id":"610004071","IsSelect":false,"Place":"工1-321","Day":"","Grades":[1]},{"Id":"610005071","IsSelect":true,"Place":"工1-321","Day":"","Grades":[1]},{"Id":"610012071","IsSelect":false,"Place":"工1-221","Day":"","Grades":[1]},{"Id":"610013071","IsSelect":false,"Place":"工1-322","Day":"","Grades":[3]},{"Id":"610016001","IsSelect":true,"Place":"工4-111","Day":"","Grades":[3]},{"Id":"610019001","IsSelect":true,"Place":"工1-322","Day":"","Grades":[3]},{"Id":"610038001","IsSelect":true,"Place":"共4-505","Day":"","Grades":[4]},{"Id":"610042001","IsSelect":true,"Place":"工1-509","Day":"","Grades":[4]}] \ No newline at end of file
--- a/cmd_donwload.go Wed Apr 01 14:08:32 2020 +0900 +++ b/cmd_donwload.go Wed Apr 01 19:10:30 2020 +0900 @@ -21,7 +21,7 @@ func (cd *cmdDownload) run(ctx context.Context, argv []string, outStream, errStream io.Writer) error { fmt.Println("download now!!") dh := syllabus.CreateGetSyllabus() - _, err := dh.CheckAndMkdirBuilddir() + err := checkAndMkdirBuilddir(dh.Outputdir) if err != nil { return err }
--- a/cmd_pdf.go Wed Apr 01 14:08:32 2020 +0900 +++ b/cmd_pdf.go Wed Apr 01 19:10:30 2020 +0900 @@ -1,9 +1,13 @@ package lectable import ( + "bufio" "context" + "encoding/json" "fmt" "io" + "os" + "path/filepath" ) type cmdPDF struct{} @@ -17,7 +21,16 @@ } func (cd *cmdPDF) run(ctx context.Context, argv []string, outStream, errStream io.Writer) error { - ppsr, _ := convertStringFromPDF(&argv) + outputDir := guessOutputDir() + err := checkAndMkdirBuilddir(outputDir) + if err != nil { + return err + } + + ppsr, err := convertStringFromPDF(argv) + if err != nil { + return err + } var fpfs []*lectureFPDF for _, pps := range *ppsr { for _, pp := range pps { @@ -29,8 +42,43 @@ } } + var dlfps []DumpLectureFPDF + for _, h := range fpfs { - fmt.Println(*h) + dlfps = append(dlfps, DumpLectureFPDF{ + Id: h.id, + IsSelect: h.isSelect, + Place: h.place, + Grades: h.grades, + Day: h.day, + }) } + fmt.Println(dlfps) + dumpJson(dlfps, outputDir) return nil } + +type DumpLectureFPDF struct { + Id string `json:id` + IsSelect bool `json:isSelect` + Place string `json:place` + Day string `json:day` + Grades []int `json:grades` +} + +func dumpJson(dlfp []DumpLectureFPDF, outputdir string) error { + bytes, err := json.Marshal(dlfp) + if err != nil { + return err + } + fp := filepath.Join(outputdir, "dumpFromPDF.json") + file, err := os.Create(fp) + bw := bufio.NewWriter(file) + _, err = bw.Write(bytes) + if err != nil { + return err + } + bw.Flush() + file.Close() + return nil +}
--- a/go.mod Wed Apr 01 14:08:32 2020 +0900 +++ b/go.mod Wed Apr 01 19:10:30 2020 +0900 @@ -5,6 +5,7 @@ require ( github.com/ledongthuc/pdf v0.0.0-20200323191019-23c5852adbd2 github.com/pkg/errors v0.9.1 + github.com/yuin/charsetutil v1.0.0 golang.org/x/text v0.3.2 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 )
--- a/go.sum Wed Apr 01 14:08:32 2020 +0900 +++ b/go.sum Wed Apr 01 19:10:30 2020 +0900 @@ -1,9 +1,17 @@ github.com/dcu/pdf v0.0.0-20190612170416-c8b299d05f89 h1:h8vPbGmLBQY15p63w4I9dGQWc6YtR3p+uG1j/T+GrF4= github.com/dcu/pdf v0.0.0-20190612170416-c8b299d05f89/go.mod h1:Y73szhmilZ/gaudo99AA26HG9ldrKYcVIyKBne65nMQ= +github.com/gogs/chardet v0.0.0-20150115103509-2404f7772561 h1:aBzukfDxQlCTVS0NBUjI5YA3iVeaZ9Tb5PxNrrIP1xs= +github.com/gogs/chardet v0.0.0-20150115103509-2404f7772561/go.mod h1:Pcatq5tYkCW2Q6yrR2VRHlbHpZ/R4/7qyL1TCF7vl14= github.com/ledongthuc/pdf v0.0.0-20200323191019-23c5852adbd2 h1:H9HhyvygtvWnn1R8ymra4vdIUOvDDlaPlX6mjoJ9UTY= github.com/ledongthuc/pdf v0.0.0-20200323191019-23c5852adbd2/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= +github.com/yuin/charsetutil v1.0.0 h1:yMFDHL1cp9PUuwQHIzSrscOggJ0lStCkVqodXs57NKY= +github.com/yuin/charsetutil v1.0.0/go.mod h1:l9Fjvlj42gWS8XJ4Ht2KdYL/2qduX/KsQHueBPLjAns= +golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e h1:bRhVy7zSSasaqNksaRZiA5EEI+Ei4I1nO5Jh72wfHlg= +golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
--- a/pdf.go Wed Apr 01 14:08:32 2020 +0900 +++ b/pdf.go Wed Apr 01 19:10:30 2020 +0900 @@ -24,9 +24,9 @@ type pdfPagesStr []string -func convertStringFromPDF(pdfPaths *[]string) (*[]pdfPagesStr, error) { +func convertStringFromPDF(pdfPaths []string) (*[]pdfPagesStr, error) { var ppsr []pdfPagesStr - for _, apdf := range *pdfPaths { + for _, apdf := range pdfPaths { file, reader, err := pdf.Open(apdf) defer file.Close() if err != nil { @@ -41,9 +41,8 @@ buf.ReadFrom(preader) planString := string(norm.NFKC.Bytes(buf.Bytes())) - planstrings := strings.Split(planString, " ") // " " is next pdf page - planstrings = planstrings[1:] // 最初はゴミ + //planstrings = planstrings[1:] // 最初はゴミ ppsr = append(ppsr, planstrings) } return &ppsr, nil @@ -113,7 +112,6 @@ startDay = strings.LastIndex(s, day) } if startDay < lastIndex { - fmt.Println(s[startDay:lastIndex]) break } } @@ -187,10 +185,11 @@ } if strings.Contains(lecturefpd.body, "大学英語") && !strings.Contains(lecturefpd.body, "大学英語を") { - _, err := parseUniversityEnglish(lecturefpd.body) + lecs, err := parseUniversityEnglish(lecturefpd.body, lecturefpd.id) if err != nil { return nil, errors.Wrap(err, "failed parse univ english") } + lecturefpdfs = append(lecturefpdfs, lecs...) continue } @@ -217,33 +216,71 @@ return lecturefpdfs, nil } -func parseUniversityEnglish(s string) ([]*lectureFPDF, error) { +func parseUniversityEnglish(s, id string) ([]*lectureFPDF, error) { /* 外101大学英語42―2非富里 明美8421~4月木2共3-205(月)共3-403(木)半年知能情報(42) 外101大学英語42―2非宮城 和文9401~4月木2共3-403(月)共3-201(木)半年知能情報(20) */ type uestruct struct { - index int - kanji string + index int + kanji string + kanjiIndex int } var uess []uestruct - for _, day := range days { + for i, day := range days { if startDay := strings.Index(s, day); startDay != -1 { ues := uestruct{ - index: startDay, - kanji: day, + index: startDay, + kanji: day, + kanjiIndex: i, } uess = append(uess, ues) } } - _, err := strconv.Atoi(s[uess[1].index+kanjiDayLen : uess[1].index+kanjiDayLen+1]) - if err != nil { - return nil, errors.Wrap(err, "failed parse int at univ english") - } + var lecs []*lectureFPDF + + kyouKaji := "共" + leftParen := "(" + kindex := strings.Index(s, kyouKaji) + leftParenIndex := strings.Index(s, leftParen) + + var strBuilder strings.Builder + strBuilder.WriteString(uess[0].kanji) + strBuilder.WriteString(s[uess[1].index+kanjiDayLen : uess[1].index+kanjiDayLen+1]) + + lecs = append(lecs, &lectureFPDF{ + id: id, + isSelect: false, + body: s, + place: s[kindex:leftParenIndex], + grades: []int{1}, + day: strBuilder.String(), + }) + + strBuilder.Reset() - return nil, nil + kindex = strings.LastIndex(s, kyouKaji) + + strBuilder.WriteString(leftParen) + strBuilder.WriteString(uess[1].kanji) + leftParenIndex = strings.LastIndex(s, strBuilder.String()) + + strBuilder.Reset() + strBuilder.WriteString(uess[1].kanji) + strBuilder.WriteString(s[uess[1].index+kanjiDayLen : uess[1].index+kanjiDayLen+1]) + + lecs = append(lecs, &lectureFPDF{ + id: id, + isSelect: false, + body: s, + place: s[kindex:leftParenIndex], + grades: []int{1}, + day: strBuilder.String(), + }) + + return lecs, nil }
--- a/syllabus/getSyllabus.go Wed Apr 01 14:08:32 2020 +0900 +++ b/syllabus/getSyllabus.go Wed Apr 01 19:10:30 2020 +0900 @@ -30,7 +30,7 @@ type GetSyllabus struct { year int term string - outputdir string + Outputdir string } // LectureDay include day of week (0~4, error -> 5), period, lastpriod (1~6) @@ -66,7 +66,7 @@ } gs.term = "previous" - gs.outputdir = filepath.Join(strconv.Itoa(gs.year), gs.term) + gs.Outputdir = filepath.Join(strconv.Itoa(gs.year), gs.term) return &gs } @@ -79,18 +79,6 @@ //"https://tiglon.jim.u-ryukyu.ac.jp/portal/Public/Syllabus/SyllabusSearchStart.aspx?lct_year=2019&lct_cd=610004071&je_cd=1" var endpoint = "https://tiglon.jim.u-ryukyu.ac.jp" -//CheckAndMkdirBuilddir is builld 2019/early dir -func (g *GetSyllabus) CheckAndMkdirBuilddir() (bool, error) { - if f, err := os.Stat(g.outputdir); os.IsNotExist(err) || !f.IsDir() { - err := os.MkdirAll(g.outputdir, 0755) - if err != nil { - return false, errors.Wrap(err, "failed mkdir") - } - return true, nil - } - return true, nil -} - func (g *GetSyllabus) LecIDStoDonwlodSyllabus(ctx context.Context, lectureIDs []string, outStream io.Writer) (*[]LectureWPath, error) { //var wg sync.WaitGroup ch := make(chan LectureWPath, len(lectureIDs)) @@ -120,7 +108,7 @@ strBuilder.WriteString(lectureID) strBuilder.WriteString(".html") - outputPath := filepath.Join(g.outputdir, strBuilder.String()) + outputPath := filepath.Join(g.Outputdir, strBuilder.String()) if _, err := os.Stat(outputPath); err == nil { fmt.Fprintf(outStream, "already download %s.html\n", lectureID) @@ -269,7 +257,7 @@ if err != nil { return err } - fp := filepath.Join(g.outputdir, "dump_lectures.json") + fp := filepath.Join(g.Outputdir, "dump_lectures.json") file, err := os.Create(fp) bw := bufio.NewWriter(file) _, err = bw.Write(bytes) @@ -286,7 +274,7 @@ if err != nil { return err } - fp := filepath.Join(g.outputdir, "dump.json") + fp := filepath.Join(g.Outputdir, "dump.json") file, err := os.Create(fp) bw := bufio.NewWriter(file) _, err = bw.Write(bytes)