select_object_read_file_test.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. package oss
  2. import (
  3. "bufio"
  4. "io"
  5. "os"
  6. "strings"
  7. "strconv"
  8. "regexp"
  9. "encoding/json"
  10. "encoding/csv"
  11. )
  12. func handleError(err error) error {
  13. if err == nil {
  14. return nil
  15. }
  16. return err
  17. }
  18. func readCsvLine(fileName string) (int, error) {
  19. file, err := os.Open(fileName)
  20. if err != nil {
  21. return 0,err
  22. }
  23. defer file.Close()
  24. rd := csv.NewReader(file)
  25. rc, err := rd.ReadAll()
  26. return len(rc), err
  27. }
  28. func readCsvIsEmpty(fileName string) (string, error) {
  29. file, err := os.Open(fileName)
  30. if err != nil {
  31. return "",err
  32. }
  33. defer file.Close()
  34. var out string
  35. var i, index int
  36. var indexYear, indexStateAbbr, indexCityName, indexPopulationCount int
  37. rd := bufio.NewReader(file)
  38. for {
  39. line, err := rd.ReadString('\n') // read a line
  40. if io.EOF == err {
  41. break
  42. }
  43. if err != nil {
  44. return "",err
  45. }
  46. sptLint := strings.Split(line, ",")
  47. if i == 0 {
  48. i = 1
  49. for _, val := range sptLint {
  50. switch val {
  51. case "Year":
  52. indexYear = index
  53. case "StateAbbr":
  54. indexStateAbbr = index
  55. case "CityName":
  56. indexCityName = index
  57. case "PopulationCount":
  58. indexPopulationCount = index
  59. }
  60. index++
  61. }
  62. } else {
  63. if sptLint[indexCityName] != "" {
  64. outLine := sptLint[indexYear] + "," + sptLint[indexStateAbbr] + "," + sptLint[indexCityName] + "," + sptLint[indexPopulationCount] + "\n"
  65. out += outLine
  66. }
  67. }
  68. }
  69. return out, nil
  70. }
  71. func readCsvLike(fileName string) (string, error) {
  72. file, err := os.Open(fileName)
  73. if err != nil {
  74. return "",err
  75. }
  76. defer file.Close()
  77. var out string
  78. var i, index int
  79. var indexYear,indexStateAbbr,indexCityName,indexPopulationCount,indexMeasure int
  80. rd := bufio.NewReader(file)
  81. for {
  82. line, err := rd.ReadString('\n') // read a line
  83. if io.EOF == err {
  84. break
  85. }
  86. if err != nil {
  87. return "",err
  88. }
  89. //utf8Lint := ConvertToString(line,"gbk", "utf-8")
  90. sptLint := strings.Split(line[:(len(line)-1)], ",")
  91. if i == 0 {
  92. i = 1
  93. for _, val := range sptLint {
  94. switch val {
  95. case "Year":indexYear = index
  96. case "StateAbbr":indexStateAbbr = index
  97. case "CityName":indexCityName = index
  98. case "Short_Question_Text":indexPopulationCount = index
  99. case "Measure":indexMeasure = index
  100. }
  101. index++
  102. }
  103. } else {
  104. if sptLint[indexMeasure] != "" {
  105. reg := regexp.MustCompile("^.*blood pressure.*Years$")
  106. res := reg.FindAllString(sptLint[indexMeasure], -1)
  107. if len(res) > 0 {
  108. outLine := sptLint[indexYear] + "," +sptLint[indexStateAbbr] + "," +sptLint[indexCityName] + "," + sptLint[indexPopulationCount] + "\n"
  109. out += outLine
  110. }
  111. }
  112. }
  113. }
  114. return out, nil
  115. }
  116. func readCsvRange(fileName string, l int, r int) (string, error) {
  117. file, err := os.Open(fileName)
  118. if err != nil {
  119. return "", err
  120. }
  121. defer file.Close()
  122. var out string
  123. var i, index int
  124. var indexYear,indexStateAbbr,indexCityName,indexPopulationCount int
  125. rd := bufio.NewReader(file)
  126. for j := 0; j < r + 1; j++ {
  127. if j < l {
  128. continue
  129. }
  130. line, err := rd.ReadString('\n') // read a line
  131. if io.EOF == err {
  132. break
  133. }
  134. if err != nil {
  135. return "", err
  136. }
  137. sptLint := strings.Split(line[:(len(line)-1)], ",")
  138. if i == 0 {
  139. i = 1
  140. for _, val := range sptLint {
  141. switch val {
  142. case "Year":indexYear = index
  143. case "StateAbbr":indexStateAbbr = index
  144. case "CityName":indexCityName = index
  145. case "Short_Question_Text":indexPopulationCount = index
  146. }
  147. index++
  148. }
  149. } else {
  150. outLine := sptLint[indexYear] + "," +sptLint[indexStateAbbr] + "," +sptLint[indexCityName] + "," + sptLint[indexPopulationCount] + "\n"
  151. out += outLine
  152. }
  153. }
  154. return out, nil
  155. }
  156. func readCsvFloatAgg(fileName string) (avg, max, sum float64, er error) {
  157. file, err := os.Open(fileName)
  158. if err != nil {
  159. er = err
  160. return
  161. }
  162. defer file.Close()
  163. var i, index int
  164. var indexDataValue int
  165. rd := csv.NewReader(file)
  166. for {
  167. rc, err := rd.Read()
  168. if io.EOF == err {
  169. break
  170. }
  171. if err != nil {
  172. er = err
  173. return
  174. }
  175. if i == 0 {
  176. i=1
  177. for index = 0; index < len(rc); index++ {
  178. if rc[index] == "Data_Value" {
  179. indexDataValue = index
  180. }
  181. }
  182. } else {
  183. if rc[indexDataValue] != "" {
  184. s1, err := strconv.ParseFloat(rc[indexDataValue], 64)
  185. if err != nil {
  186. er = err
  187. return
  188. }
  189. sum +=s1
  190. if s1 > max {
  191. max = s1
  192. }
  193. i++
  194. }
  195. }
  196. }
  197. avg = sum / float64(i-1)
  198. return
  199. }
  200. func readCsvConcat(fileName string) (string, error) {
  201. var out string
  202. file, err := os.Open(fileName)
  203. if err != nil {
  204. return out, err
  205. }
  206. defer file.Close()
  207. var i int
  208. var indexDataValue int
  209. var indexYear,indexStateAbbr,indexCityName,indexShortQuestionText, indexDataValueUnit int
  210. rd := csv.NewReader(file)
  211. for {
  212. rc, err := rd.Read()
  213. if io.EOF == err {
  214. break
  215. }
  216. if err != nil {
  217. return out, err
  218. }
  219. if i == 0 {
  220. for j, v := range rc {
  221. switch v {
  222. case "Year":indexYear = j
  223. case "StateAbbr":indexStateAbbr = j
  224. case "CityName":indexCityName = j
  225. case "Short_Question_Text":indexShortQuestionText = j
  226. case "Data_Value_Unit":indexDataValueUnit = j
  227. case "Data_Value":indexDataValue = j
  228. }
  229. }
  230. } else {
  231. i++
  232. if rc[indexDataValue] != "" || rc[indexDataValueUnit] != "" {
  233. reg := regexp.MustCompile("^14.8.*$")
  234. reD := reg.FindAllString(rc[indexDataValue], -1)
  235. reDU := reg.FindAllString(rc[indexDataValueUnit], -1)
  236. if len(reD) > 0 || len(reDU) > 0 {
  237. outLine := rc[indexYear] + "," +rc[indexStateAbbr] + "," +rc[indexCityName] + "," + rc[indexShortQuestionText] + "\n"
  238. out += outLine
  239. }
  240. }
  241. }
  242. i++
  243. }
  244. return out, nil
  245. }
  246. func readCsvComplicateCondition(fileName string)(string, error) {
  247. var out string
  248. file, err := os.Open(fileName)
  249. if err != nil {
  250. return out, err
  251. }
  252. defer file.Close()
  253. var i int
  254. var indexDataValue, indexCategory, indexHighConfidenceLimit, indexMeasure int
  255. var indexYear,indexStateAbbr,indexCityName,indexShortQuestionText, indexDataValueUnit int
  256. rd := csv.NewReader(file)
  257. for {
  258. rc, err := rd.Read()
  259. if io.EOF == err {
  260. break
  261. }
  262. if err != nil {
  263. return out, err
  264. }
  265. if i == 0 {
  266. for j, v := range rc {
  267. switch v {
  268. case "Year":indexYear = j
  269. case "StateAbbr":indexStateAbbr = j
  270. case "CityName":indexCityName = j
  271. case "Short_Question_Text":indexShortQuestionText = j
  272. case "Data_Value_Unit":indexDataValueUnit = j
  273. case "Data_Value":indexDataValue = j
  274. case "Measure":indexMeasure = j
  275. case "Category":indexCategory = j
  276. case "High_Confidence_Limit":indexHighConfidenceLimit = j
  277. }
  278. }
  279. } else {
  280. reg := regexp.MustCompile("^.*18 Years$")
  281. reM := reg.FindAllString(rc[indexMeasure], -1)
  282. var dataV, limitV float64
  283. if rc[indexDataValue] != "" {
  284. dataV, err = strconv.ParseFloat(rc[indexDataValue], 64)
  285. if err != nil {
  286. return out, err
  287. }
  288. }
  289. if rc[indexHighConfidenceLimit] != "" {
  290. limitV, err = strconv.ParseFloat(rc[indexHighConfidenceLimit], 64)
  291. if err != nil {
  292. return out, err
  293. }
  294. }
  295. if dataV > 14.8 && rc[indexDataValueUnit] == "%" || len(reM) > 0 &&
  296. rc[indexCategory] == "Unhealthy Behaviors" || limitV > 70.0 {
  297. outLine := rc[indexYear] + "," +rc[indexStateAbbr] + "," +rc[indexCityName] + "," + rc[indexShortQuestionText] + "," + rc[indexDataValue] + "," + rc[indexDataValueUnit] + "," + rc[indexCategory] + "," + rc[indexHighConfidenceLimit] + "\n"
  298. out += outLine
  299. }
  300. }
  301. i++
  302. }
  303. return out, nil
  304. }
  305. type Extra struct {
  306. Address string `json:"address"`
  307. ContactForm string `json:"contact_form"`
  308. Fax string `json:"fax,omitempty"`
  309. How string `json:"how,omitempty"`
  310. Office string `json:"office"`
  311. RssUrl string `json:"rss_url,omitempty"`
  312. }
  313. type Person struct {
  314. Bioguideid string `json:"bioguideid"`
  315. Birthday string `json:"birthday"`
  316. Cspanid int `json:"cspanid"`
  317. Firstname string `json:"firstname"`
  318. Gender string `json:"gender"`
  319. GenderLabel string `json:"gender_label"`
  320. Lastname string `json:"lastname"`
  321. Link string `json:"link"`
  322. Middlename string `json:"middlename"`
  323. Name string `json:"name"`
  324. Namemod string `json:"namemod"`
  325. Nickname string `json:"nickname"`
  326. Osid string `json:"osid"`
  327. Pvsid *string `json:"pvsid"`
  328. Sortname string `json:"sortname"`
  329. Twitterid *string `json:"twitterid"`
  330. Youtubeid *string `json:"youtubeid"`
  331. }
  332. type JsonLineSt struct {
  333. Caucus *string `json:"caucus"`
  334. CongressNumbers []int `json:"congress_numbers"`
  335. Current bool `json:"current"`
  336. Description string `json:"description"`
  337. District *string `json:"district"`
  338. Enddate string `json:"enddate"`
  339. Extra Extra `json:"extra"`
  340. LeadershipTitle *string `json:"leadership_title"`
  341. Party string `json:"party"`
  342. Person Person `json:"person"`
  343. Phone string `json:"phone"`
  344. RoleType string `json:"role_type"`
  345. RoleTypeLabel string `json:"role_type_label"`
  346. SenatorClass string `json:"senator_class"`
  347. SenatorClassLabel string `json:"senator_class_label"`
  348. SenatorRank string `json:"senator_rank"`
  349. SenatorRankLabel string `json:"senator_rank_label"`
  350. Startdate string `json:"startdate"`
  351. State string `json:"state"`
  352. Title string `json:"title"`
  353. TitleLong string `json:"title_long"`
  354. Website string `json:"website"`
  355. }
  356. type Metast struct {
  357. limit int
  358. Offset int
  359. TotalCount int
  360. }
  361. type JsonSt struct {
  362. Meta Metast
  363. Objects []JsonLineSt `json:"objects"`
  364. }
  365. func readJsonDocument(fileName string) (string, error){
  366. var out string
  367. var data JsonSt
  368. file, err := os.Open(fileName)
  369. if err != nil {
  370. return "",err
  371. }
  372. decoder := json.NewDecoder(file)
  373. err = decoder.Decode(&data)
  374. for _, v := range data.Objects {
  375. if v.Party == "Democrat"{
  376. lint, err := json.Marshal(v)
  377. if err != nil {
  378. return "",err
  379. }
  380. lints := strings.Replace(string(lint), "\\u0026", "&", -1)
  381. out += lints + ","
  382. }
  383. }
  384. return out, err
  385. }
  386. func readJsonLinesLike(fileName string) (string, error){
  387. var out string
  388. var data JsonSt
  389. file, err := os.Open(fileName)
  390. if err != nil {
  391. return "",err
  392. }
  393. decoder := json.NewDecoder(file)
  394. err = decoder.Decode(&data)
  395. reg := regexp.MustCompile("^1959.*")
  396. for _, v := range data.Objects {
  397. reB := reg.FindAllString(v.Person.Birthday, -1)
  398. if len(reB) > 0 {
  399. lints := "{\"firstname\":\"" + v.Person.Firstname + "\",\"lastname\":\"" + v.Person.Lastname + "\"}"
  400. out += lints + ","
  401. }
  402. }
  403. return out, err
  404. }
  405. func readJsonLinesRange(fileName string, l, r int) (string, error){
  406. var out string
  407. var data JsonSt
  408. var i int
  409. file, err := os.Open(fileName)
  410. if err != nil {
  411. return "",err
  412. }
  413. decoder := json.NewDecoder(file)
  414. err = decoder.Decode(&data)
  415. for _, v := range data.Objects {
  416. if i < l {
  417. continue
  418. }
  419. if i >= r {
  420. break
  421. }
  422. extrb, err := json.Marshal(v.Extra)
  423. if err != nil {
  424. return "",err
  425. }
  426. extr := strings.Replace(string(extrb), "\\u0026", "&", -1)
  427. lints := "{\"firstname\":\"" + v.Person.Firstname + "\",\"lastname\":\"" + v.Person.Lastname +
  428. "\",\"extra\":" + extr + "}"
  429. out += lints + ","
  430. i++
  431. }
  432. return out, err
  433. }
  434. func readJsonFloatAggregation(fileName string) (float64, float64, float64, error){
  435. var avg, max, min, sum float64
  436. var data JsonSt
  437. var i int
  438. file, err := os.Open(fileName)
  439. if err != nil {
  440. return avg, max, min, err
  441. }
  442. decoder := json.NewDecoder(file)
  443. err = decoder.Decode(&data)
  444. for _, v := range data.Objects {
  445. if i == 0 {
  446. min = float64(v.Person.Cspanid)
  447. }
  448. if max < float64(v.Person.Cspanid) {
  449. max = float64(v.Person.Cspanid)
  450. }
  451. if min > float64(v.Person.Cspanid) {
  452. min = float64(v.Person.Cspanid)
  453. }
  454. sum += float64(v.Person.Cspanid)
  455. i++
  456. }
  457. avg = sum / float64(i)
  458. return avg, max, min, err
  459. }
  460. func readJsonDocumentConcat(fileName string) (string, error){
  461. var out string
  462. var data JsonSt
  463. file, err := os.Open(fileName)
  464. if err != nil {
  465. return "",err
  466. }
  467. decoder := json.NewDecoder(file)
  468. err = decoder.Decode(&data)
  469. for _, v := range data.Objects {
  470. if v.Person.Firstname + v.Person.Lastname == "JohnKennedy" {
  471. extrb, err := json.Marshal(v.Person)
  472. if err != nil {
  473. return "",err
  474. }
  475. extr := "{\"person\":" + strings.Replace(string(extrb), "\\u0026", "&", -1) + "}"
  476. out += extr + ","
  477. }
  478. }
  479. return out, err
  480. }
  481. func readJsonComplicateConcat(fileName string) (string, error){
  482. var out string
  483. var data JsonSt
  484. file, err := os.Open(fileName)
  485. if err != nil {
  486. return "",err
  487. }
  488. decoder := json.NewDecoder(file)
  489. err = decoder.Decode(&data)
  490. for _, v := range data.Objects {
  491. if v.Startdate > "2017-01-01" && v.SenatorRank == "junior" ||
  492. v.State == "CA" && v.Party == "Repulican" {
  493. cn := "["
  494. for _,vv := range v.CongressNumbers {
  495. cn += strconv.Itoa(vv) + ","
  496. }
  497. cn = cn[:len(cn)-1] + "]"
  498. lints := "{\"firstname\":\"" + v.Person.Firstname + "\",\"lastname\":\"" + v.Person.Lastname + "\",\"congress_numbers\":" + cn + "}"
  499. out += lints + ","
  500. }
  501. }
  502. return out, err
  503. }