rhadoop linear regression 问题


library(rhdfs) library(rmr2) hdfs.init() hdfs.delete("/user/output/lm.output") map <- function(k,lines) { lines<-unlist(strsplit(lines,'#')) k<-lines[1] x<-unlist(strsplit(lines[2],',')) y<-unlist(strsplit(lines[3],',')) x<-as.numeric(x) y<-as.numeric(y) lm <- lm(y ~ x) return( keyval(k, lm$coefficients[[2]]) ) } reduce <- function(key, lmres) {   # keyval(key, lmres)   return lmres } wordcount <- function (input, output=NULL) {   mapreduce(input=input, output=output, input.format="text",output.format = "text", map=map, reduce=reduce) } hdfs.root <- '/user' hdfs.data <- file.path(hdfs.root, 'input/lm.input') hdfs.out <- file.path(hdfs.root, 'output/lm.output') out <- wordcount(hdfs.data, hdfs.out)
输入文件为:/user/input/lm.input 1#1,2,3,4#2,4,6,8 2#1,2,3,4#3,6,9,12 3#1,2,3,4#4,8,12,16 4#1,2,3,4#1.5,3,4.5,6
但是输出却只有2个结果: 1       2 4       1.5

相关内容

    暂无相关文章