-r requirements.txt pip install
As a data analyst, working with a data set involves several important steps to gain insights and make informed decisions some important or crucial aspects are -
- Data Set Summary
- Data Distribution
- Data Visualization
- Model building
Python is a great tools to creating application easily lots of the framework are pre build to do that one of the important as respect to analyst is Streamlit.
I am creating a developments application for my working purpose and I share with you and you moderate as your requirements otherwise drop a mail with changes I will help you.
Five things in this application and plug in lots of useful things as per project requirements mainly this version it have:
- Upload CSV
- Data view tab
- Data Summary
- Univariate and Bivariate Plots
- Simple and Multiple regression Models
Its a better practice to create a folder and inside this folders create necessary files. In this project (EasyTask) have three files 1. requirements.txt 2. app.py 3. plot.py.
Open the terminal and go through the directory with write the command
Download all the necessary python module with packages for run this projects. It is the better processes before run jump into the code at first create a virtual environments and activate this environments.
Then copy and past this code on the plot.py files
import streamlit as st
import matplotlib.pyplot as plt
univariateplot(plot_type, df, variable_name):
def
if plot_type == 'Line Plot':
plt.plot(df[variable_name])
plt.title('Line Plot')
st.pyplot()
== 'Bar Plot':
elif plot_type plt.bar(df.index, df[variable_name])
plt.title('Bar Plot')
st.pyplot()
== 'Scatter Plot':
elif plot_type plt.scatter(df.index, df[variable_name])
plt.title('Scatter Plot')
st.pyplot()
else:
st.write('Select a plot type.')
bivariateplot(plot_type, df, variable1, variable2):
def if plot_type == 'Line Plot':
plt.plot(df[variable1], df[variable2])
plt.title('Line Plot')
st.pyplot()
== 'Bar Plot':
elif plot_type plt.bar(df[variable1], df[variable2])
plt.title('Bar Plot')
st.pyplot()
== 'Scatter Plot':
elif plot_type plt.scatter(df[variable1], df[variable2])
plt.title('Scatter Plot')
st.pyplot()
else:
st.write('Select a plot type.')
Just separate the type of plots in a .py files because it is easy to moderate as per requirements and it connect our app.py file.
import numpy as np
import pandas as pd
import streamlit as st
import statsmodels.api as sm
import matplotlib.pyplot as plt
from plot import univariateplot, bivariateplot
main():
def # st.set_page_config(layout="wide")
st.set_option('deprecation.showPyplotGlobalUse', False)
data_input()
data_output()
data_input():
def # Use Streamlit widgets in the sidebar to get user input
= st.sidebar.file_uploader("Upload a CSV file", type=["csv"])
uploaded_file if uploaded_file is not None:
= pd.read_csv(uploaded_file)
df 'uploaded_df'] = df
st.session_state[
data_output():
def if 'uploaded_df' in st.session_state:
= st.session_state['uploaded_df']
df
= st.tabs(
tab1, tab2, tab3, tab4 "DATA", "SUMMARY", "PLOTS", "REGRESSION"])
[:
with tab1if 'uploaded_df' in st.session_state:
= st.session_state['uploaded_df']
df st.write("")
st.write("")
st.dataframe(df)
:
with tab2st.write("")
st.write("")
= df.describe()
des st.write(des)
:
with tab3if 'uploaded_df' in st.session_state:
= st.session_state['uploaded_df']
df
= st.radio("Choose data type:",
variable_type "Univariate", "Bivariate"))
(if variable_type == "Univariate":
= st.selectbox(
plot_type "Select Plot type:",['Line Plot', 'Bar Plot', 'Scatter Plot'])
= st.selectbox("Select a variable:", df.columns)
variable_name
st.write("You selected:", plot_type, "based on", variable_name)
if st.button('Create Plot'):
univariateplot(plot_type, df, variable_name)
== "Bivariate":
elif variable_type = st.selectbox(
plot_type "Select Plot type:", ['Line Plot', 'Bar Plot', 'Scatter Plot'])
= st.selectbox(
variable_name1 "Select x axis variable:", df.columns)
= st.selectbox(
variable_name2 "Select y axis variable:", df.columns)
st.write("You selected:", plot_type, "based on",
"and", variable_name2)
variable_name1,
if st.button('Create Plot'):
bivariateplot(plot_type, df, variable_name1, variable_name2)
:
with tab4= st.selectbox(
dependent_variable "Select dependent variable:", df.columns, key= "dependent_variable")
= st.selectbox(
independent_variable "Select independent variable:",df.columns,key="independent_variable")
= df[dependent_variable]
endog_data
= df[independent_variable]
exog_data = sm.add_constant(exog_data)
exog_data
if st.button("Create Model"):
= sm.OLS(endog_data, exog_data)
model = model.fit()
res st.write(res.summary())
else:
st.header("EasyTask")
st.subheader("Introducing our revolutionary project pre-stage idea – a seamless and user-friendly platform that transforms your CSV data into actionable insights with just a few clicks!")
st.write("Thank you for considering me for your development needs. I am here to assist you throughout the development process. Please don't hesitate to reach out if you have any questions or require any modifications. Your satisfaction is my priority, and I am more than happy to help. Feel free to contact me anytime, and together, we can create something remarkable.")
st.write("email id: maitysougata724@gmail.com")
if __name__ == "__main__":
main()
For a data analyst and data scientist its a best practice to doing this types of intermediate project it give an idea how functionality run and to connect all the components.
Plots
Regression Model
All the code share in my github page download and run. If you have any problems please let me know I am happy to help you.