<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>tidymodels | Data Science in Agriculture</title><link>https://luanppott.netlify.app/tag/tidymodels/</link><atom:link href="https://luanppott.netlify.app/tag/tidymodels/index.xml" rel="self" type="application/rss+xml"/><description>tidymodels</description><generator>Wowchemy (https://wowchemy.com)</generator><language>en-us</language><lastBuildDate>Thu, 23 Sep 2021 00:00:00 +0000</lastBuildDate><image><url>https://luanppott.netlify.app/media/icon_hu64dbc3ef6cc8eee8a271968fd359f750_313029_512x512_fill_lanczos_center_2.png</url><title>tidymodels</title><link>https://luanppott.netlify.app/tag/tidymodels/</link></image><item><title>R - Data visualization, linear regression and logistic regression</title><link>https://luanppott.netlify.app/post/post3/</link><pubDate>Thu, 23 Sep 2021 00:00:00 +0000</pubDate><guid>https://luanppott.netlify.app/post/post3/</guid><description>
&lt;script src="https://luanppott.netlify.app/post/post3/index.en_files/header-attrs/header-attrs.js">&lt;/script>
&lt;div id="data-visualization-linear-regression-and-logistic-regression" class="section level1">
&lt;h1>Data visualization, linear regression and logistic regression&lt;/h1>
&lt;div id="crop-data-collection---ground-truth-and-remote-sensing" class="section level2">
&lt;h2>Crop data collection - ground truth and remote sensing&lt;/h2>
&lt;/div>
&lt;div id="loading-the-packages" class="section level2">
&lt;h2>Loading the packages&lt;/h2>
&lt;pre class="r">&lt;code>library(tidyverse)
library(tidymodels)
library(sf)
library(geobr)&lt;/code>&lt;/pre>
&lt;/div>
&lt;div id="url-link-of-data-from-data-collection-and-remote-sensing-on-github-httpsgithub.comluanpott10class" class="section level2">
&lt;h2>URL link of data from data collection and remote sensing on GitHub &lt;a href="https://github.com/luanpott10/Class" class="uri">https://github.com/luanpott10/Class&lt;/a>&lt;/h2>
&lt;/div>
&lt;div id="you-will-need-the-raw-file-httpsraw.githubusercontent.comluanpott10classmaindata_crops.csv" class="section level2">
&lt;h2>You will need the raw file &lt;a href="https://raw.githubusercontent.com/luanpott10/Class/main/data_crops.csv" class="uri">https://raw.githubusercontent.com/luanpott10/Class/main/data_crops.csv&lt;/a>&lt;/h2>
&lt;/div>
&lt;div id="loading-the-data" class="section level2">
&lt;h2>Loading the data&lt;/h2>
&lt;pre class="r">&lt;code>data &amp;lt;- read.csv(&amp;quot;https://raw.githubusercontent.com/luanpott10/Class/main/data_crops.csv&amp;quot;)&lt;/code>&lt;/pre>
&lt;/div>
&lt;div id="plot-the-data-in-the-map" class="section level2">
&lt;h2>Plot the data in the map&lt;/h2>
&lt;pre class="r">&lt;code>cities_RS &amp;lt;- read_municipality(code_muni = &amp;quot;RS&amp;quot;, year= 2020)
ggplot()+
geom_sf(data=cities_RS)+
geom_point(data=data,aes(x=longitude,y=latitude,fill=class),shape=22,size=2)+
labs(x= &amp;quot;Longitude&amp;quot;, y = &amp;quot;Latitude&amp;quot;)+
scale_fill_manual(values = c(&amp;quot;#eded0c&amp;quot;, &amp;quot;#49a345&amp;quot;))+
theme(legend.position = c(0.17, 0.2),
panel.border = element_rect(color=&amp;quot;Black&amp;quot;, fill = NA),
panel.background = element_rect(fill = &amp;quot;#f2f2f2&amp;quot;),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_text(family = &amp;quot;serif&amp;quot;,
colour = &amp;quot;#000000&amp;quot;,size = 22.0),
axis.text.x = element_text(family = &amp;quot;serif&amp;quot;,
colour = &amp;quot;#000000&amp;quot;,size = 18.0),
axis.title.y = element_text(family = &amp;quot;serif&amp;quot;,
colour = &amp;quot;#000000&amp;quot;,size = 22.0),
axis.text.y = element_text(family = &amp;quot;serif&amp;quot;,
colour = &amp;quot;#000000&amp;quot;,size = 18.0),
legend.title = element_text(family = &amp;quot;serif&amp;quot;,
colour = &amp;#39;#000000&amp;#39;,size = 12.0),
legend.text = element_text(family = &amp;quot;serif&amp;quot;,
colour = &amp;#39;#000000&amp;#39;,size = 12.0),
legend.background = element_rect(fill=&amp;quot;#f2f2f2&amp;quot;,
linetype=&amp;quot;dashed&amp;quot;,
colour =&amp;quot;#f2f2f2&amp;quot;))&lt;/code>&lt;/pre>
&lt;p>&lt;img src="https://luanppott.netlify.app/post/post3/index.en_files/figure-html/unnamed-chunk-3-1.png" width="480" />&lt;/p>
&lt;/div>
&lt;div id="plot-the-data-variables---continuous-x-continuous" class="section level2">
&lt;h2>Plot the data variables - continuous x continuous&lt;/h2>
&lt;pre class="r">&lt;code>ggplot(data=data,aes(x=b3_GCVI,y=b4_GCVI))+
geom_point(aes(fill=class),shape=22,size=2)+
scale_fill_manual(values = c(&amp;quot;#eded0c&amp;quot;, &amp;quot;#49a345&amp;quot;))+
stat_smooth(formula = y~x, method=&amp;quot;lm&amp;quot;, se=FALSE,color=&amp;quot;black&amp;quot;, linetype=&amp;#39;dashed&amp;#39;)+
theme(panel.border = element_rect(color=&amp;quot;Black&amp;quot;, fill = NA),
panel.background = element_rect(fill = &amp;quot;#f2f2f2&amp;quot;),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_text(family = &amp;quot;serif&amp;quot;,
colour = &amp;quot;#000000&amp;quot;,size = 22.0),
axis.text.x = element_text(family = &amp;quot;serif&amp;quot;,
colour = &amp;quot;#000000&amp;quot;,size = 18.0),
axis.title.y = element_text(family = &amp;quot;serif&amp;quot;,
colour = &amp;quot;#000000&amp;quot;,size = 22.0),
axis.text.y = element_text(family = &amp;quot;serif&amp;quot;,
colour = &amp;quot;#000000&amp;quot;,size = 18.0),
legend.title = element_text(family = &amp;quot;serif&amp;quot;,
colour = &amp;#39;#000000&amp;#39;,size = 12.0),
legend.text = element_text(family = &amp;quot;serif&amp;quot;,
colour = &amp;#39;#000000&amp;#39;,size = 12.0),
legend.background = element_rect(fill=&amp;quot;#f2f2f2&amp;quot;,
linetype=&amp;quot;dashed&amp;quot;,
colour =&amp;quot;#f2f2f2&amp;quot;))&lt;/code>&lt;/pre>
&lt;p>&lt;img src="https://luanppott.netlify.app/post/post3/index.en_files/figure-html/unnamed-chunk-4-1.png" width="672" />&lt;/p>
&lt;/div>
&lt;div id="linear-regression" class="section level2">
&lt;h2>Linear regression&lt;/h2>
&lt;pre class="r">&lt;code>lm_fit_x &amp;lt;- lm(b3_GCVI ~ b4_GCVI, data = data)
summary(lm_fit_x)
##
## Call:
## lm(formula = b3_GCVI ~ b4_GCVI, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7924 -0.9983 -0.0018 0.9525 3.9586
##
## Coefficients:
## Estimate Std. Error t value Pr(&amp;gt;|t|)
## (Intercept) -0.73701 0.15933 -4.626 1.14e-05 ***
## b4_GCVI -0.49793 0.04352 -11.440 &amp;lt; 2e-16 ***
## ---
## Signif. codes: 0 &amp;#39;***&amp;#39; 0.001 &amp;#39;**&amp;#39; 0.01 &amp;#39;*&amp;#39; 0.05 &amp;#39;.&amp;#39; 0.1 &amp;#39; &amp;#39; 1
##
## Residual standard error: 1.475 on 98 degrees of freedom
## Multiple R-squared: 0.5718, Adjusted R-squared: 0.5675
## F-statistic: 130.9 on 1 and 98 DF, p-value: &amp;lt; 2.2e-16&lt;/code>&lt;/pre>
&lt;/div>
&lt;div id="linear-regression-by-tidymodels-workflow" class="section level2">
&lt;h2>Linear regression by tidymodels workflow&lt;/h2>
&lt;div id="creating-a-parsnip-specification-for-a-linear-regression-model" class="section level4">
&lt;h4>Creating a parsnip specification for a linear regression model&lt;/h4>
&lt;pre class="r">&lt;code>lm_model &amp;lt;- linear_reg() |&amp;gt;
set_engine(&amp;#39;lm&amp;#39;) |&amp;gt;
set_mode(&amp;#39;regression&amp;#39;)&lt;/code>&lt;/pre>
&lt;/div>
&lt;div id="fitting-the-model-supplying-a-formula-expression-and-the-data" class="section level4">
&lt;h4>Fitting the model supplying a formula expression and the data&lt;/h4>
&lt;pre class="r">&lt;code>lm_fit &amp;lt;- lm_model %&amp;gt;%
fit(b3_GCVI ~ b4_GCVI, data = data)&lt;/code>&lt;/pre>
&lt;/div>
&lt;div id="summary-of-the-model" class="section level4">
&lt;h4>Summary of the model&lt;/h4>
&lt;pre class="r">&lt;code>lm_fit |&amp;gt;
pluck(&amp;quot;fit&amp;quot;) |&amp;gt;
summary()
##
## Call:
## stats::lm(formula = b3_GCVI ~ b4_GCVI, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7924 -0.9983 -0.0018 0.9525 3.9586
##
## Coefficients:
## Estimate Std. Error t value Pr(&amp;gt;|t|)
## (Intercept) -0.73701 0.15933 -4.626 1.14e-05 ***
## b4_GCVI -0.49793 0.04352 -11.440 &amp;lt; 2e-16 ***
## ---
## Signif. codes: 0 &amp;#39;***&amp;#39; 0.001 &amp;#39;**&amp;#39; 0.01 &amp;#39;*&amp;#39; 0.05 &amp;#39;.&amp;#39; 0.1 &amp;#39; &amp;#39; 1
##
## Residual standard error: 1.475 on 98 degrees of freedom
## Multiple R-squared: 0.5718, Adjusted R-squared: 0.5675
## F-statistic: 130.9 on 1 and 98 DF, p-value: &amp;lt; 2.2e-16
# Also you can use
lm_fit$fit |&amp;gt; summary()
##
## Call:
## stats::lm(formula = b3_GCVI ~ b4_GCVI, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7924 -0.9983 -0.0018 0.9525 3.9586
##
## Coefficients:
## Estimate Std. Error t value Pr(&amp;gt;|t|)
## (Intercept) -0.73701 0.15933 -4.626 1.14e-05 ***
## b4_GCVI -0.49793 0.04352 -11.440 &amp;lt; 2e-16 ***
## ---
## Signif. codes: 0 &amp;#39;***&amp;#39; 0.001 &amp;#39;**&amp;#39; 0.01 &amp;#39;*&amp;#39; 0.05 &amp;#39;.&amp;#39; 0.1 &amp;#39; &amp;#39; 1
##
## Residual standard error: 1.475 on 98 degrees of freedom
## Multiple R-squared: 0.5718, Adjusted R-squared: 0.5675
## F-statistic: 130.9 on 1 and 98 DF, p-value: &amp;lt; 2.2e-16&lt;/code>&lt;/pre>
&lt;/div>
&lt;div id="parameter-estimates-of-a-the-lm-object" class="section level4">
&lt;h4>Parameter estimates of a the lm object&lt;/h4>
&lt;pre class="r">&lt;code>tidy(lm_fit)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## &amp;lt;chr&amp;gt; &amp;lt;dbl&amp;gt; &amp;lt;dbl&amp;gt; &amp;lt;dbl&amp;gt; &amp;lt;dbl&amp;gt;
## 1 (Intercept) -0.737 0.159 -4.63 1.14e- 5
## 2 b4_GCVI -0.498 0.0435 -11.4 9.39e-20&lt;/code>&lt;/pre>
&lt;/div>
&lt;div id="extract-the-model-statistics" class="section level4">
&lt;h4>Extract the model statistics&lt;/h4>
&lt;pre class="r">&lt;code>glance(lm_fit)
## # A tibble: 1 x 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## &amp;lt;dbl&amp;gt; &amp;lt;dbl&amp;gt; &amp;lt;dbl&amp;gt; &amp;lt;dbl&amp;gt; &amp;lt;dbl&amp;gt; &amp;lt;dbl&amp;gt; &amp;lt;dbl&amp;gt; &amp;lt;dbl&amp;gt; &amp;lt;dbl&amp;gt;
## 1 0.572 0.567 1.47 131. 9.39e-20 1 -180. 365. 373.
## # ... with 3 more variables: deviance &amp;lt;dbl&amp;gt;, df.residual &amp;lt;int&amp;gt;, nobs &amp;lt;int&amp;gt;&lt;/code>&lt;/pre>
&lt;/div>
&lt;/div>
&lt;div id="plot-the-data-variables---categorical-x-continuous" class="section level2">
&lt;h2>Plot the data variables - categorical x continuous&lt;/h2>
&lt;pre class="r">&lt;code>data &amp;lt;- data |&amp;gt; mutate(target = case_when(class == &amp;quot;corn&amp;quot; ~ 0,
class == &amp;quot;soybean&amp;quot; ~ 1))
ggplot(data=data, aes(y=target,x=b4_GCVI))+
geom_point(aes(fill=as.factor(target)),shape=22,size=2) +
scale_fill_manual(values = c(&amp;quot;#eded0c&amp;quot;, &amp;quot;#49a345&amp;quot;))+
scale_y_continuous(breaks=c(0,1),
labels=c(&amp;quot;0&amp;quot;,&amp;quot;1&amp;quot;),
limits=c(0,1))+
stat_smooth(formula = y~x, method=&amp;quot;glm&amp;quot;, se=FALSE, method.args = list(family=binomial),
color=&amp;quot;black&amp;quot;, linetype=&amp;#39;dashed&amp;#39;)+
labs(x= &amp;quot;b4_GCVI&amp;quot;, y = &amp;quot;Class&amp;quot;,fill=&amp;quot;Class&amp;quot;)+
theme(legend.position = c(0.17, 0.2),
panel.border = element_rect(color=&amp;quot;Black&amp;quot;, fill = NA),
panel.background = element_rect(fill = &amp;quot;#f2f2f2&amp;quot;),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_text(family = &amp;quot;serif&amp;quot;,
colour = &amp;quot;#000000&amp;quot;,size = 22.0),
axis.text.x = element_text(family = &amp;quot;serif&amp;quot;,
colour = &amp;quot;#000000&amp;quot;,size = 18.0),
axis.title.y = element_text(family = &amp;quot;serif&amp;quot;,
colour = &amp;quot;#000000&amp;quot;,size = 22.0),
axis.text.y = element_text(family = &amp;quot;serif&amp;quot;,
colour = &amp;quot;#000000&amp;quot;,size = 18.0),
legend.title = element_text(family = &amp;quot;serif&amp;quot;,
colour = &amp;#39;#000000&amp;#39;,size = 12.0),
legend.text = element_text(family = &amp;quot;serif&amp;quot;,
colour = &amp;#39;#000000&amp;#39;,size = 12.0),
legend.background = element_rect(fill=&amp;quot;#f2f2f2&amp;quot;,
linetype=&amp;quot;dashed&amp;quot;,
colour =&amp;quot;#f2f2f2&amp;quot;))&lt;/code>&lt;/pre>
&lt;p>&lt;img src="https://luanppott.netlify.app/post/post3/index.en_files/figure-html/unnamed-chunk-11-1.png" width="480" />&lt;/p>
&lt;/div>
&lt;div id="logistic-regression" class="section level2">
&lt;h2>Logistic regression&lt;/h2>
&lt;pre class="r">&lt;code>lg_fit_x &amp;lt;- glm(as.factor(class) ~ b4_GCVI, family=&amp;quot;binomial&amp;quot;, data=data)
summary(lg_fit_x)
##
## Call:
## glm(formula = as.factor(class) ~ b4_GCVI, family = &amp;quot;binomial&amp;quot;,
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.91207 -0.04298 0.01134 0.32227 1.86651
##
## Coefficients:
## Estimate Std. Error z value Pr(&amp;gt;|z|)
## (Intercept) -3.5734 1.1155 -3.203 0.00136 **
## b4_GCVI -1.5677 0.3778 -4.150 3.33e-05 ***
## ---
## Signif. codes: 0 &amp;#39;***&amp;#39; 0.001 &amp;#39;**&amp;#39; 0.01 &amp;#39;*&amp;#39; 0.05 &amp;#39;.&amp;#39; 0.1 &amp;#39; &amp;#39; 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 138.629 on 99 degrees of freedom
## Residual deviance: 42.379 on 98 degrees of freedom
## AIC: 46.379
##
## Number of Fisher Scoring iterations: 7&lt;/code>&lt;/pre>
&lt;/div>
&lt;div id="logistic-regression-by-tidymodels-workflow" class="section level2">
&lt;h2>Logistic regression by tidymodels workflow&lt;/h2>
&lt;div id="creating-a-parsnip-specification-for-a-logistic-regression-model" class="section level4">
&lt;h4>Creating a parsnip specification for a logistic regression model&lt;/h4>
&lt;pre class="r">&lt;code>lg_model &amp;lt;- logistic_reg() |&amp;gt;
set_engine(&amp;quot;glm&amp;quot;) |&amp;gt;
set_mode(&amp;quot;classification&amp;quot;)&lt;/code>&lt;/pre>
&lt;/div>
&lt;div id="fitting-the-model-supplying-a-formula-expression-and-the-data-1" class="section level4">
&lt;h4>Fitting the model supplying a formula expression and the data&lt;/h4>
&lt;pre class="r">&lt;code>lg_fit &amp;lt;- lg_model |&amp;gt;
fit(as.factor(class) ~ b4_GCVI, data = data)&lt;/code>&lt;/pre>
&lt;/div>
&lt;div id="summary-of-the-model-1" class="section level4">
&lt;h4>Summary of the model&lt;/h4>
&lt;pre class="r">&lt;code>lg_fit |&amp;gt;
pluck(&amp;quot;fit&amp;quot;) |&amp;gt;
summary()
##
## Call:
## stats::glm(formula = as.factor(class) ~ b4_GCVI, family = stats::binomial,
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.91207 -0.04298 0.01134 0.32227 1.86651
##
## Coefficients:
## Estimate Std. Error z value Pr(&amp;gt;|z|)
## (Intercept) -3.5734 1.1155 -3.203 0.00136 **
## b4_GCVI -1.5677 0.3778 -4.150 3.33e-05 ***
## ---
## Signif. codes: 0 &amp;#39;***&amp;#39; 0.001 &amp;#39;**&amp;#39; 0.01 &amp;#39;*&amp;#39; 0.05 &amp;#39;.&amp;#39; 0.1 &amp;#39; &amp;#39; 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 138.629 on 99 degrees of freedom
## Residual deviance: 42.379 on 98 degrees of freedom
## AIC: 46.379
##
## Number of Fisher Scoring iterations: 7
# Also you can use
lg_fit$fit |&amp;gt; summary()
##
## Call:
## stats::glm(formula = as.factor(class) ~ b4_GCVI, family = stats::binomial,
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.91207 -0.04298 0.01134 0.32227 1.86651
##
## Coefficients:
## Estimate Std. Error z value Pr(&amp;gt;|z|)
## (Intercept) -3.5734 1.1155 -3.203 0.00136 **
## b4_GCVI -1.5677 0.3778 -4.150 3.33e-05 ***
## ---
## Signif. codes: 0 &amp;#39;***&amp;#39; 0.001 &amp;#39;**&amp;#39; 0.01 &amp;#39;*&amp;#39; 0.05 &amp;#39;.&amp;#39; 0.1 &amp;#39; &amp;#39; 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 138.629 on 99 degrees of freedom
## Residual deviance: 42.379 on 98 degrees of freedom
## AIC: 46.379
##
## Number of Fisher Scoring iterations: 7&lt;/code>&lt;/pre>
&lt;/div>
&lt;div id="parameter-estimates-of-a-the-lm-object-1" class="section level4">
&lt;h4>Parameter estimates of a the lm object&lt;/h4>
&lt;pre class="r">&lt;code>tidy(lg_fit)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## &amp;lt;chr&amp;gt; &amp;lt;dbl&amp;gt; &amp;lt;dbl&amp;gt; &amp;lt;dbl&amp;gt; &amp;lt;dbl&amp;gt;
## 1 (Intercept) -3.57 1.12 -3.20 0.00136
## 2 b4_GCVI -1.57 0.378 -4.15 0.0000333&lt;/code>&lt;/pre>
&lt;/div>
&lt;div id="extract-the-model-statistics-1" class="section level4">
&lt;h4>Extract the model statistics&lt;/h4>
&lt;pre class="r">&lt;code>glance(lg_fit)
## # A tibble: 1 x 8
## null.deviance df.null logLik AIC BIC deviance df.residual nobs
## &amp;lt;dbl&amp;gt; &amp;lt;int&amp;gt; &amp;lt;dbl&amp;gt; &amp;lt;dbl&amp;gt; &amp;lt;dbl&amp;gt; &amp;lt;dbl&amp;gt; &amp;lt;int&amp;gt; &amp;lt;int&amp;gt;
## 1 139. 99 -21.2 46.4 51.6 42.4 98 100&lt;/code>&lt;/pre>
&lt;/div>
&lt;/div>
&lt;/div></description></item></channel></rss>