pred_boot<-function(boot,bootMed,dados){
  # dados[colnames(boot)[-which(colnames(boot)=="(Intercept)")]]->dados
  rowSums(t(t(boot[,-which(colnames(boot)=="(Intercept)")]) * as.numeric(dados)))+boot[,which(colnames(boot)=="(Intercept)")]->pred
  as.numeric(HDInterval::hdi(pred))->int
  mean(pred,na.rm=T)->media
  sum(bootMed[-1] * as.numeric(dados))+bootMed[1]->pred_med
  
  return(tibble(low=int[1],up=int[2],int_media=media,boot_media=pred_med))
}

aval_model_nova_safra<-function(modelos,safra_aval,safra_obs,banco_bruto){
  # browser()
  modelo_aval<-(modelos %>% filter(SAFRA==safra_aval) %>% pull(monte_carlo))[[1]]
  modelo_obs<-(modelos %>% filter(SAFRA==safra_obs) %>% pull(monte_carlo))[[1]]
  
  predict(modelo_aval$reg_dados_comp,newdata=modelo_obs$dados_comp)->modelo_obs$dados_comp$pred
  
  modelo_obs$dados_comp %>% 
  ggplot(aes_string(x="y",y="pred"))+
    geom_point()+
    geom_abline(intercept = 0,slope=1)->g1
  
  modelo_aval$m3$t->boot_modelo
  summary(modelo_aval$m3)$bootMed->bootMed
  
  
  banco_bruto %>% 
    filter(SAFRA %in% c(safra_aval,safra_obs)) %>% 
    dplyr::select(SAFRA,`NOME DA FAZENDA`) %>% 
    group_by(`NOME DA FAZENDA`) %>% 
    count %>% 
    filter(n>1) %>% 
    pull(`NOME DA FAZENDA`)->fazendas_rep
  
  banco_bruto %>% 
    filter(SAFRA %in% c(safra_aval)) %>% 
    filter(`NOME DA FAZENDA` %in% fazendas_rep) %>% 
    dplyr::select(modelo_aval$independentes,`NOME DA FAZENDA`) %>% 
    .[complete.cases(.),]->banco_to_predict
  
  names(banco_to_predict)[-ncol(banco_to_predict)]<-sapply(names(banco_to_predict)[1:ncol(banco_to_predict)-1],rm_crter)
  banco_to_predict %>% 
    split(.$`NOME DA FAZENDA`) %>% 
    purrr::map(~pred_boot(boot = boot_modelo,bootMed=bootMed,dados = .))->resp
  
  do.call("rbind",lapply(names(resp),function(x){resp[[x]]$nome=x; return(resp[[x]])})) %>% 
    dplyr::select(nome,int_media,boot_media,low,up) %>% 
    left_join(
      # novobanco %>% dplyr::select_("nome",novomodelo$dependente)
      banco_bruto %>% 
        filter(SAFRA==safra_obs) %>% 
        dplyr::select(safra_atual=modelos[1,]$monte_carlo[[1]]$dependente,nome=`NOME DA FAZENDA`)
      ) %>% 
    left_join(
      # banco %>% dplyr::select_("nome",resp_1516=novomodelo$dependente)
      banco_bruto %>%
      filter(SAFRA==safra_aval) %>% 
        dplyr::select(safra_anterior=modelos[1,]$monte_carlo[[1]]$dependente,nome=`NOME DA FAZENDA`)
      )->aval_boot
  
  g2<-aval_boot %>% 
    ggplot(aes(x=reorder(nome,abs(up-low)),y=safra_atual))+
    geom_linerange(aes(x=reorder(nome,abs(up-low)),ymin=low,ymax=up),lwd=3,alpha=.6,col="grey")+
    geom_point()+
    geom_point(aes(x=reorder(nome,abs(up-low)),y=boot_media),shape=4,col="red")+
    geom_point(aes(x=reorder(nome,abs(up-low)),y=safra_anterior),shape=2,col="blue")+
    coord_flip()+
    theme_bw()+
    xlab("")+
    ylab(modelos[1,]$monte_carlo[[1]]$dependente)
  
  return(list(yx=g1,boot=g2))
  
}


gen_biplot<-function(pca,clusters){
  fviz_pca_biplot(pca,labelsize = 4,col.var = "black",
                  # alpha.var =   "contrib",
                  habillage = as.factor(clusters$data.clust$clust),
                  label = "var",addEllipses = T,ellipse.level = 0.68,title=" ")+
    theme_minimal()+
    xlab("")+ylab("")
}

gen_dudi.pca<-function(data){
  dudi.pca(data$call$X,scale=T,scannf=F,nf=30)
}

fit_PCA<-function(x){
  PCA(x[,-1],graph = F)
}

describe_cluster <- function(data,clusters) {

  clusters$data.clust %>% 
    describeBy(group = "clust",mat = T,digits = 3) %>% 
    rownames_to_column(var='Variavel') %>% 
    mutate(Variavel=gsub("\\d",replacement = "",x = Variavel)) %>% 
    filter(Variavel!='clust*')->desc_table
  
  clusters$data.clust %>% 
    rename(Cluster=clust) %>% 
    mutate(Cluster=as.factor(Cluster)) %>% 
    gather(key = 'key',value = 'valor',-Cluster) %>% 
    ggplot(aes(x=Cluster,y=valor,col=Cluster))+
    geom_boxplot()+
    facet_wrap(~key,scales='free')+
    ylab("")+xlab("")->g1
  
  fazendas<-data %>% 
    dplyr::select(nome) %>% 
    bind_cols(
      as_tibble(clusters$data.clust)
    ) %>% 
    arrange(clust)
  
  return(
    list(
      fazendas=fazendas,
      desc_table=desc_table,
      boxplot=g1
    )
  )
}

rm_crter<-function(x){gsub("[^A-z]","",tolower(x))}

qual_cluster<-function(data_cluster,amostra){
  data_cluster %>% 
    dplyr::select(valormdiodevenda,gmdglobalkg,lotaoglobaluaha,desembolsocabms,clust) %>% 
    .[complete.cases(.),] %>% 
    split(.$clust) %>% 
    purrr::map(~kmeans(x=.,center=1)) %>% 
    purrr::map("centers") %>% 
    purrr::map(~as.data.frame(.)) %>% 
    do.call("rbind",.)->cluster_kmean
  
  which.min(sqrt(rowSums(do.call("rbind",apply(cluster_kmean[,-5],1,"-",amostra)))^2))->resp
  return(as.numeric(resp))
}
