changeset 14:bac5eb544d4d

fix
author anatofuz <anatofuz@cr.ie.u-ryukyu.ac.jp>
date Wed, 01 Apr 2020 19:10:30 +0900
parents 989cfda07d71
children 8313f8fd6a44
files 2020/previous/dumpFromPDF.json cmd_donwload.go cmd_pdf.go go.mod go.sum pdf.go syllabus/getSyllabus.go
diffstat 7 files changed, 120 insertions(+), 37 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/2020/previous/dumpFromPDF.json	Wed Apr 01 19:10:30 2020 +0900
@@ -0,0 +1,1 @@
+[{"Id":"101501081","IsSelect":false,"Place":"共3-205","Day":"月2","Grades":[1]},{"Id":"101501081","IsSelect":false,"Place":"共3-403","Day":"木2","Grades":[1]},{"Id":"101501091","IsSelect":false,"Place":"共3-403","Day":"月2","Grades":[1]},{"Id":"101501091","IsSelect":false,"Place":"共3-201","Day":"木2","Grades":[1]},{"Id":"101526071","IsSelect":true,"Place":"共3-102","Day":"","Grades":[4]},{"Id":"101526081","IsSelect":true,"Place":"共3-405","Day":"","Grades":[4]},{"Id":"101526091","IsSelect":true,"Place":"共3-202","Day":"","Grades":[4]},{"Id":"100826081","IsSelect":true,"Place":"共1-217","Day":"","Grades":[1]},{"Id":"100408031","IsSelect":false,"Place":"共1-118","Day":"","Grades":[4]},{"Id":"610000071","IsSelect":false,"Place":"工1-321","Day":"","Grades":[1]},{"Id":"610002071","IsSelect":false,"Place":"工1-321","Day":"","Grades":[1]},{"Id":"610004071","IsSelect":false,"Place":"工1-321","Day":"","Grades":[1]},{"Id":"610005071","IsSelect":true,"Place":"工1-321","Day":"","Grades":[1]},{"Id":"610012071","IsSelect":false,"Place":"工1-221","Day":"","Grades":[1]},{"Id":"610013071","IsSelect":false,"Place":"工1-322","Day":"","Grades":[3]},{"Id":"610019001","IsSelect":true,"Place":"工1-322","Day":"","Grades":[3]},{"Id":"610042001","IsSelect":true,"Place":"工1-509","Day":"","Grades":[4]},{"Id":"617000001","IsSelect":false,"Place":"工1-321","Day":"","Grades":[1]},{"Id":"617003011","IsSelect":false,"Place":"地創棟508","Day":"","Grades":[2]},{"Id":"617003021","IsSelect":false,"Place":"地創棟508","Day":"","Grades":[2]},{"Id":"617005001","IsSelect":true,"Place":"工1-322","Day":"","Grades":[2]},{"Id":"617006001","IsSelect":false,"Place":"工1-322","Day":"","Grades":[2]},{"Id":"617007001","IsSelect":false,"Place":"工1-321","Day":"","Grades":[2]},{"Id":"617008001","IsSelect":false,"Place":"工1-321","Day":"","Grades":[2]},{"Id":"617009001","IsSelect":false,"Place":"工1-321","Day":"","Grades":[2]},{"Id":"617015001","IsSelect":true,"Place":"工1-322","Day":"","Grades":[2]},{"Id":"617016001","IsSelect":true,"Place":"工1-322","Day":"","Grades":[2]},{"Id":"617021001","IsSelect":false,"Place":"地創棟508","Day":"","Grades":[3]},{"Id":"617022001","IsSelect":true,"Place":"工1-321","Day":"","Grades":[3]},{"Id":"617023001","IsSelect":true,"Place":"工1-321","Day":"","Grades":[3]},{"Id":"617028001","IsSelect":true,"Place":"工2-313","Day":"","Grades":[3]},{"Id":"617038001","IsSelect":true,"Place":"工1-321","Day":"","Grades":[3]},{"Id":"617039001","IsSelect":true,"Place":"共4-305","Day":"","Grades":[4]},{"Id":"610000071","IsSelect":false,"Place":"工1-321","Day":"","Grades":[1]},{"Id":"610001011","IsSelect":false,"Place":"工1-221","Day":"","Grades":[1]},{"Id":"610001021","IsSelect":false,"Place":"工1-222","Day":"","Grades":[1]},{"Id":"610001031","IsSelect":false,"Place":"工1-221","Day":"","Grades":[1]},{"Id":"610001041","IsSelect":false,"Place":"工1-222","Day":"","Grades":[1]},{"Id":"610002021","IsSelect":true,"Place":"工4-111","Day":"","Grades":[1]},{"Id":"610002071","IsSelect":false,"Place":"工1-321","Day":"","Grades":[1]},{"Id":"610004071","IsSelect":false,"Place":"工1-321","Day":"","Grades":[1]},{"Id":"610005071","IsSelect":true,"Place":"工1-321","Day":"","Grades":[1]},{"Id":"610012071","IsSelect":false,"Place":"工1-221","Day":"","Grades":[1]},{"Id":"610013071","IsSelect":false,"Place":"工1-322","Day":"","Grades":[3]},{"Id":"610016001","IsSelect":true,"Place":"工4-111","Day":"","Grades":[3]},{"Id":"610019001","IsSelect":true,"Place":"工1-322","Day":"","Grades":[3]},{"Id":"610038001","IsSelect":true,"Place":"共4-505","Day":"","Grades":[4]},{"Id":"610042001","IsSelect":true,"Place":"工1-509","Day":"","Grades":[4]}]
\ No newline at end of file
--- a/cmd_donwload.go	Wed Apr 01 14:08:32 2020 +0900
+++ b/cmd_donwload.go	Wed Apr 01 19:10:30 2020 +0900
@@ -21,7 +21,7 @@
 func (cd *cmdDownload) run(ctx context.Context, argv []string, outStream, errStream io.Writer) error {
 	fmt.Println("download now!!")
 	dh := syllabus.CreateGetSyllabus()
-	_, err := dh.CheckAndMkdirBuilddir()
+	err := checkAndMkdirBuilddir(dh.Outputdir)
 	if err != nil {
 		return err
 	}
--- a/cmd_pdf.go	Wed Apr 01 14:08:32 2020 +0900
+++ b/cmd_pdf.go	Wed Apr 01 19:10:30 2020 +0900
@@ -1,9 +1,13 @@
 package lectable
 
 import (
+	"bufio"
 	"context"
+	"encoding/json"
 	"fmt"
 	"io"
+	"os"
+	"path/filepath"
 )
 
 type cmdPDF struct{}
@@ -17,7 +21,16 @@
 }
 
 func (cd *cmdPDF) run(ctx context.Context, argv []string, outStream, errStream io.Writer) error {
-	ppsr, _ := convertStringFromPDF(&argv)
+	outputDir := guessOutputDir()
+	err := checkAndMkdirBuilddir(outputDir)
+	if err != nil {
+		return err
+	}
+
+	ppsr, err := convertStringFromPDF(argv)
+	if err != nil {
+		return err
+	}
 	var fpfs []*lectureFPDF
 	for _, pps := range *ppsr {
 		for _, pp := range pps {
@@ -29,8 +42,43 @@
 		}
 	}
 
+	var dlfps []DumpLectureFPDF
+
 	for _, h := range fpfs {
-		fmt.Println(*h)
+		dlfps = append(dlfps, DumpLectureFPDF{
+			Id:       h.id,
+			IsSelect: h.isSelect,
+			Place:    h.place,
+			Grades:   h.grades,
+			Day:      h.day,
+		})
 	}
+	fmt.Println(dlfps)
+	dumpJson(dlfps, outputDir)
 	return nil
 }
+
+type DumpLectureFPDF struct {
+	Id       string `json:id`
+	IsSelect bool   `json:isSelect`
+	Place    string `json:place`
+	Day      string `json:day`
+	Grades   []int  `json:grades`
+}
+
+func dumpJson(dlfp []DumpLectureFPDF, outputdir string) error {
+	bytes, err := json.Marshal(dlfp)
+	if err != nil {
+		return err
+	}
+	fp := filepath.Join(outputdir, "dumpFromPDF.json")
+	file, err := os.Create(fp)
+	bw := bufio.NewWriter(file)
+	_, err = bw.Write(bytes)
+	if err != nil {
+		return err
+	}
+	bw.Flush()
+	file.Close()
+	return nil
+}
--- a/go.mod	Wed Apr 01 14:08:32 2020 +0900
+++ b/go.mod	Wed Apr 01 19:10:30 2020 +0900
@@ -5,6 +5,7 @@
 require (
 	github.com/ledongthuc/pdf v0.0.0-20200323191019-23c5852adbd2
 	github.com/pkg/errors v0.9.1
+	github.com/yuin/charsetutil v1.0.0
 	golang.org/x/text v0.3.2
 	golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543
 )
--- a/go.sum	Wed Apr 01 14:08:32 2020 +0900
+++ b/go.sum	Wed Apr 01 19:10:30 2020 +0900
@@ -1,9 +1,17 @@
 github.com/dcu/pdf v0.0.0-20190612170416-c8b299d05f89 h1:h8vPbGmLBQY15p63w4I9dGQWc6YtR3p+uG1j/T+GrF4=
 github.com/dcu/pdf v0.0.0-20190612170416-c8b299d05f89/go.mod h1:Y73szhmilZ/gaudo99AA26HG9ldrKYcVIyKBne65nMQ=
+github.com/gogs/chardet v0.0.0-20150115103509-2404f7772561 h1:aBzukfDxQlCTVS0NBUjI5YA3iVeaZ9Tb5PxNrrIP1xs=
+github.com/gogs/chardet v0.0.0-20150115103509-2404f7772561/go.mod h1:Pcatq5tYkCW2Q6yrR2VRHlbHpZ/R4/7qyL1TCF7vl14=
 github.com/ledongthuc/pdf v0.0.0-20200323191019-23c5852adbd2 h1:H9HhyvygtvWnn1R8ymra4vdIUOvDDlaPlX6mjoJ9UTY=
 github.com/ledongthuc/pdf v0.0.0-20200323191019-23c5852adbd2/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
+github.com/yuin/charsetutil v1.0.0 h1:yMFDHL1cp9PUuwQHIzSrscOggJ0lStCkVqodXs57NKY=
+github.com/yuin/charsetutil v1.0.0/go.mod h1:l9Fjvlj42gWS8XJ4Ht2KdYL/2qduX/KsQHueBPLjAns=
+golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e h1:bRhVy7zSSasaqNksaRZiA5EEI+Ei4I1nO5Jh72wfHlg=
+golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
--- a/pdf.go	Wed Apr 01 14:08:32 2020 +0900
+++ b/pdf.go	Wed Apr 01 19:10:30 2020 +0900
@@ -24,9 +24,9 @@
 
 type pdfPagesStr []string
 
-func convertStringFromPDF(pdfPaths *[]string) (*[]pdfPagesStr, error) {
+func convertStringFromPDF(pdfPaths []string) (*[]pdfPagesStr, error) {
 	var ppsr []pdfPagesStr
-	for _, apdf := range *pdfPaths {
+	for _, apdf := range pdfPaths {
 		file, reader, err := pdf.Open(apdf)
 		defer file.Close()
 		if err != nil {
@@ -41,9 +41,8 @@
 
 		buf.ReadFrom(preader)
 		planString := string(norm.NFKC.Bytes(buf.Bytes()))
-
 		planstrings := strings.Split(planString, "   ") // "   " is next pdf page
-		planstrings = planstrings[1:]                   // 最初はゴミ
+		//planstrings = planstrings[1:]                   // 最初はゴミ
 		ppsr = append(ppsr, planstrings)
 	}
 	return &ppsr, nil
@@ -113,7 +112,6 @@
 				startDay = strings.LastIndex(s, day)
 			}
 			if startDay < lastIndex {
-				fmt.Println(s[startDay:lastIndex])
 				break
 			}
 		}
@@ -187,10 +185,11 @@
 		}
 
 		if strings.Contains(lecturefpd.body, "大学英語") && !strings.Contains(lecturefpd.body, "大学英語を") {
-			_, err := parseUniversityEnglish(lecturefpd.body)
+			lecs, err := parseUniversityEnglish(lecturefpd.body, lecturefpd.id)
 			if err != nil {
 				return nil, errors.Wrap(err, "failed parse univ english")
 			}
+			lecturefpdfs = append(lecturefpdfs, lecs...)
 			continue
 		}
 
@@ -217,33 +216,71 @@
 	return lecturefpdfs, nil
 }
 
-func parseUniversityEnglish(s string) ([]*lectureFPDF, error) {
+func parseUniversityEnglish(s, id string) ([]*lectureFPDF, error) {
 	/*
 		外101大学英語42―2非富里 明美8421~4月木2共3-205(月)共3-403(木)半年知能情報(42)
 		外101大学英語42―2非宮城 和文9401~4月木2共3-403(月)共3-201(木)半年知能情報(20)
 	*/
 
 	type uestruct struct {
-		index int
-		kanji string
+		index      int
+		kanji      string
+		kanjiIndex int
 	}
 
 	var uess []uestruct
 
-	for _, day := range days {
+	for i, day := range days {
 		if startDay := strings.Index(s, day); startDay != -1 {
 			ues := uestruct{
-				index: startDay,
-				kanji: day,
+				index:      startDay,
+				kanji:      day,
+				kanjiIndex: i,
 			}
 			uess = append(uess, ues)
 		}
 	}
 
-	_, err := strconv.Atoi(s[uess[1].index+kanjiDayLen : uess[1].index+kanjiDayLen+1])
-	if err != nil {
-		return nil, errors.Wrap(err, "failed parse int at univ english")
-	}
+	var lecs []*lectureFPDF
+
+	kyouKaji := "共"
+	leftParen := "("
+	kindex := strings.Index(s, kyouKaji)
+	leftParenIndex := strings.Index(s, leftParen)
+
+	var strBuilder strings.Builder
+	strBuilder.WriteString(uess[0].kanji)
+	strBuilder.WriteString(s[uess[1].index+kanjiDayLen : uess[1].index+kanjiDayLen+1])
+
+	lecs = append(lecs, &lectureFPDF{
+		id:       id,
+		isSelect: false,
+		body:     s,
+		place:    s[kindex:leftParenIndex],
+		grades:   []int{1},
+		day:      strBuilder.String(),
+	})
+
+	strBuilder.Reset()
 
-	return nil, nil
+	kindex = strings.LastIndex(s, kyouKaji)
+
+	strBuilder.WriteString(leftParen)
+	strBuilder.WriteString(uess[1].kanji)
+	leftParenIndex = strings.LastIndex(s, strBuilder.String())
+
+	strBuilder.Reset()
+	strBuilder.WriteString(uess[1].kanji)
+	strBuilder.WriteString(s[uess[1].index+kanjiDayLen : uess[1].index+kanjiDayLen+1])
+
+	lecs = append(lecs, &lectureFPDF{
+		id:       id,
+		isSelect: false,
+		body:     s,
+		place:    s[kindex:leftParenIndex],
+		grades:   []int{1},
+		day:      strBuilder.String(),
+	})
+
+	return lecs, nil
 }
--- a/syllabus/getSyllabus.go	Wed Apr 01 14:08:32 2020 +0900
+++ b/syllabus/getSyllabus.go	Wed Apr 01 19:10:30 2020 +0900
@@ -30,7 +30,7 @@
 type GetSyllabus struct {
 	year      int
 	term      string
-	outputdir string
+	Outputdir string
 }
 
 // LectureDay include day of week  (0~4, error -> 5), period, lastpriod (1~6)
@@ -66,7 +66,7 @@
 	}
 	gs.term = "previous"
 
-	gs.outputdir = filepath.Join(strconv.Itoa(gs.year), gs.term)
+	gs.Outputdir = filepath.Join(strconv.Itoa(gs.year), gs.term)
 	return &gs
 }
 
@@ -79,18 +79,6 @@
 //"https://tiglon.jim.u-ryukyu.ac.jp/portal/Public/Syllabus/SyllabusSearchStart.aspx?lct_year=2019&lct_cd=610004071&je_cd=1"
 var endpoint = "https://tiglon.jim.u-ryukyu.ac.jp"
 
-//CheckAndMkdirBuilddir is builld 2019/early dir
-func (g *GetSyllabus) CheckAndMkdirBuilddir() (bool, error) {
-	if f, err := os.Stat(g.outputdir); os.IsNotExist(err) || !f.IsDir() {
-		err := os.MkdirAll(g.outputdir, 0755)
-		if err != nil {
-			return false, errors.Wrap(err, "failed mkdir")
-		}
-		return true, nil
-	}
-	return true, nil
-}
-
 func (g *GetSyllabus) LecIDStoDonwlodSyllabus(ctx context.Context, lectureIDs []string, outStream io.Writer) (*[]LectureWPath, error) {
 	//var wg sync.WaitGroup
 	ch := make(chan LectureWPath, len(lectureIDs))
@@ -120,7 +108,7 @@
 	strBuilder.WriteString(lectureID)
 	strBuilder.WriteString(".html")
 
-	outputPath := filepath.Join(g.outputdir, strBuilder.String())
+	outputPath := filepath.Join(g.Outputdir, strBuilder.String())
 
 	if _, err := os.Stat(outputPath); err == nil {
 		fmt.Fprintf(outStream, "already download %s.html\n", lectureID)
@@ -269,7 +257,7 @@
 	if err != nil {
 		return err
 	}
-	fp := filepath.Join(g.outputdir, "dump_lectures.json")
+	fp := filepath.Join(g.Outputdir, "dump_lectures.json")
 	file, err := os.Create(fp)
 	bw := bufio.NewWriter(file)
 	_, err = bw.Write(bytes)
@@ -286,7 +274,7 @@
 	if err != nil {
 		return err
 	}
-	fp := filepath.Join(g.outputdir, "dump.json")
+	fp := filepath.Join(g.Outputdir, "dump.json")
 	file, err := os.Create(fp)
 	bw := bufio.NewWriter(file)
 	_, err = bw.Write(bytes)