Code
# load libraries -- make sure to activate your eds 217 environment first!
import pandas as pd
import matplotlib.pyplot as plt
# load libraries -- make sure to activate your eds 217 environment first!
import pandas as pd
import matplotlib.pyplot as plt
# load in your data
= pd.read_csv('../data/entry_survey_responses_2023.csv')
df print(df.columns)
Index(['In terms of programming in general, I consider myself to be',
'In terms of python in particular, I have',
'In terms of my confidence in my ability to use python programming language, I am:',
'In terms of my confidence with using python data science libraries, I am: ',
'In terms of my confidence with python computing tools such as conda, jupyter notebooks, and IDEs such as Visual Studio Code, I am:',
'I find that I learn coding best by [Studying independently]',
'I find that I learn coding best by [Working in small groups]',
'I find that I learn coding best by [Following along through examples ]',
'I find that I learn coding best by [Tinkering with code myself]',
'I find that I learn coding best by [Open-ended exercises]',
'I find that I learn coding best by [Focused practice sessions]'],
dtype='object')
I wanted to make a some plots of the first two columns, so I asked chatGPT the following:
I have a pandas dataframe with the following two columns with numeric 1-5 values. Make histograms using matplotlib to visualize this data: “‘In terms of programming in general, I consider myself to be’, ‘In terms of python in particular, I have’”
It gave me the code below:
# Create the first histogram
=(12, 6))
plt.figure(figsize1, 2, 1)
plt.subplot('In terms of programming in general, I consider myself to be'], bins=5, edgecolor='black')
plt.hist(df['Programming in General')
plt.title('Skill Level (1-5)')
plt.xlabel('Frequency')
plt.ylabel(range(1, 6))
plt.xticks(
# Create the second histogram
1, 2, 2)
plt.subplot('In terms of python in particular, I have'], bins=5, edgecolor='black')
plt.hist(df['Python in Particular')
plt.title('Skill Level (1-5)')
plt.xlabel('Frequency')
plt.ylabel(range(1, 6))
plt.xticks(
# Show the plots
plt.tight_layout() plt.show()
I didn’t like that the skill level wasn’t centered around the integers so I followed up with ChatGPT:
“the skill level is an integer value not continuous”
It responded with the following code modifications:
# Create histograms with bars centered at integers
=(12, 6))
plt.figure(figsize
1, 2, 1)
plt.subplot('In terms of programming in general, I consider myself to be'], bins=[0.5, 1.5, 2.5, 3.5, 4.5, 5.5], edgecolor='black', alpha=0.7)
plt.hist(df['Programming in General')
plt.title('Skill Level (1-5)')
plt.xlabel('Frequency')
plt.ylabel(range(1, 6))
plt.xticks(
1, 2, 2)
plt.subplot('In terms of python in particular, I have'], bins=[0.5, 1.5, 2.5, 3.5, 4.5, 5.5], edgecolor='black', alpha=0.7)
plt.hist(df['Python in Particular')
plt.title('Skill Level (1-5)')
plt.xlabel('Frequency')
plt.ylabel(range(1, 6))
plt.xticks(
plt.tight_layout()
plt.show()
I liked these plots, but now I want to make similar graphs for the next three columns. Let’s see if we can adapt the code above to make that plot.
# Create histograms with bars centered at integers
=(18, 6))
plt.figure(figsize
1, 3, 1)
plt.subplot('In terms of my confidence in my ability to use python programming language, I am:'], bins=[0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5], edgecolor='black', alpha=0.7)
plt.hist(df['Python')
plt.title('Confidence')
plt.xlabel('Frequency')
plt.ylabel(range(1, 11))
plt.xticks(
1, 3, 2)
plt.subplot('In terms of my confidence with using python data science libraries, I am: '], bins=[0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5], edgecolor='black', alpha=0.7)
plt.hist(df['Data Science Libraries')
plt.title('Confidence')
plt.xlabel('Frequency')
plt.ylabel(range(1, 11))
plt.xticks(
1, 3, 3)
plt.subplot('In terms of my confidence with python computing tools such as conda, jupyter notebooks, and IDEs such as Visual Studio Code, I am:'], bins=[0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5], edgecolor='black', alpha=0.7)
plt.hist(df['Computing tools (conda, jupyter, VS Code)')
plt.title('Confidence')
plt.xlabel('Frequency')
plt.ylabel(range(1, 11))
plt.xticks(
plt.tight_layout() plt.show()
Finally, I worked with ChatGPT to make a graph of the final questions on learning preferences. If you’re interested, you can see our full conversation here
= df.iloc[:, -6:]
df
# Count the occurrences of each response for each method
= {}
counts for column in df.columns:
= df[column].value_counts()
counts[column]
# Define possible responses and initialize counts
= ['Strongly Disagree', 'Somewhat Disagree', 'Neither Agree or Disagree', 'Somewhat Agree', 'Strongly Agree']
responses for column in df.columns:
for response in responses:
if response not in counts[column]:
= 0
counts[column][response]
# Sort the response counts for consistency
for column in df.columns:
= counts[column].loc[responses]
counts[column]
# Prepare data for stacked bar chart
= [s.split('[')[-1].rstrip(']') for s in counts.keys()] # Shorten the labels
short_labels = {}
data for response in responses:
= [counts[label].get(response, 0) for label in counts.keys()]
data[response]
# Define gradient colors
= ['#FF0000', '#FF5500', '#FFAA00', '#FFFF00', '#7FFF00']
gradient_colors
# Create the stacked bar chart
= plt.subplots(figsize=(15, 7))
fig, ax
= [0] * len(short_labels)
bottoms for i, (response, values) in enumerate(data.items()):
=response, bottom=bottoms, color=gradient_colors[i])
ax.bar(short_labels, values, label= [i + j for i, j in zip(bottoms, values)]
bottoms
# Add some text for labels, title, and axes ticks
'Learning Method')
ax.set_xlabel('Frequency')
ax.set_ylabel('Preferred Learning Methods for Coding')
ax.set_title(
ax.legend()
# Rotate x-axis labels to prevent overlap
=45)
ax.set_xticklabels(short_labels, rotation
# Show the plot
plt.show()
/var/folders/1f/_ptk0jz93h39qj25crwwtb0w0000gn/T/ipykernel_15649/2467521698.py:43: UserWarning: FixedFormatter should only be used together with FixedLocator
ax.set_xticklabels(short_labels, rotation=45)