Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions visualizing-python-plt-scatter/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# How to Make a Scatter Plot in Python With plt.scatter()

This folder provides the code examples for the Real Python tutorial [How to Make a Scatter Plot in Python With plt.scatter()](https://realpython.com/visualizing-python-plt-scatter/)
2 changes: 2 additions & 0 deletions visualizing-python-plt-scatter/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
matplotlib==3.10.1
numpy==2.2.4
10 changes: 10 additions & 0 deletions visualizing-python-plt-scatter/step_01_basic_scatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Sections: Getting Started With plt.scatter() / Comparing plt.scatter() and plt.plot()

import matplotlib.pyplot as plt

price = [2.50, 1.23, 4.02, 3.25, 5.00, 4.40]
sales_per_day = [34, 62, 49, 22, 13, 19]

plt.scatter(price, sales_per_day)
# plt.plot(price, sales_per_day, "o") # equivalent using plt.plot()
plt.show()
24 changes: 24 additions & 0 deletions visualizing-python-plt-scatter/step_02_timing_comparison.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Section: Comparing plt.scatter() and plt.plot()

import timeit
import matplotlib.pyplot as plt # noqa: F401

price = [2.50, 1.23, 4.02, 3.25, 5.00, 4.40]
sales_per_day = [34, 62, 49, 22, 13, 19]

print(
"plt.scatter()",
timeit.timeit(
"plt.scatter(price, sales_per_day)",
number=1000,
globals=globals(),
),
)
print(
"plt.plot()",
timeit.timeit(
"plt.plot(price, sales_per_day, 'o')",
number=1000,
globals=globals(),
),
)
11 changes: 11 additions & 0 deletions visualizing-python-plt-scatter/step_03_size_encoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Section: Changing the Size

import matplotlib.pyplot as plt
import numpy as np

price = np.asarray([2.50, 1.23, 4.02, 3.25, 5.00, 4.40])
sales_per_day = np.asarray([34, 62, 49, 22, 13, 19])
profit_margin = np.asarray([20, 35, 40, 20, 27.5, 15])

plt.scatter(x=price, y=sales_per_day, s=profit_margin * 10)
plt.show()
22 changes: 22 additions & 0 deletions visualizing-python-plt-scatter/step_04_size_and_color.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Section: Changing the Color

import matplotlib.pyplot as plt
import numpy as np

price = np.asarray([2.50, 1.23, 4.02, 3.25, 5.00, 4.40])
sales_per_day = np.asarray([34, 62, 49, 22, 13, 19])
profit_margin = np.asarray([20, 35, 40, 20, 27.5, 15])

low = (0, 1, 0)
medium = (1, 1, 0)
high = (1, 0, 0)

sugar_content = [low, high, medium, medium, high, low]

plt.scatter(
x=price,
y=sales_per_day,
s=profit_margin * 10,
c=sugar_content,
)
plt.show()
32 changes: 32 additions & 0 deletions visualizing-python-plt-scatter/step_05_two_products.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Section: Changing the Shape

import matplotlib.pyplot as plt
import numpy as np

low = (0, 1, 0)
medium = (1, 1, 0)
high = (1, 0, 0)

price_orange = np.asarray([2.50, 1.23, 4.02, 3.25, 5.00, 4.40])
sales_per_day_orange = np.asarray([34, 62, 49, 22, 13, 19])
profit_margin_orange = np.asarray([20, 35, 40, 20, 27.5, 15])
sugar_content_orange = [low, high, medium, medium, high, low]

price_cereal = np.asarray([1.50, 2.50, 1.15, 1.95])
sales_per_day_cereal = np.asarray([67, 34, 36, 12])
profit_margin_cereal = np.asarray([20, 42.5, 33.3, 18])
sugar_content_cereal = [low, high, medium, low]

plt.scatter(
x=price_orange,
y=sales_per_day_orange,
s=profit_margin_orange * 10,
c=sugar_content_orange,
)
plt.scatter(
x=price_cereal,
y=sales_per_day_cereal,
s=profit_margin_cereal * 10,
c=sugar_content_cereal,
)
plt.show()
33 changes: 33 additions & 0 deletions visualizing-python-plt-scatter/step_06_two_products_markers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Section: Changing the Shape

import matplotlib.pyplot as plt
import numpy as np

low = (0, 1, 0)
medium = (1, 1, 0)
high = (1, 0, 0)

price_orange = np.asarray([2.50, 1.23, 4.02, 3.25, 5.00, 4.40])
sales_per_day_orange = np.asarray([34, 62, 49, 22, 13, 19])
profit_margin_orange = np.asarray([20, 35, 40, 20, 27.5, 15])
sugar_content_orange = [low, high, medium, medium, high, low]

price_cereal = np.asarray([1.50, 2.50, 1.15, 1.95])
sales_per_day_cereal = np.asarray([67, 34, 36, 12])
profit_margin_cereal = np.asarray([20, 42.5, 33.3, 18])
sugar_content_cereal = [low, high, medium, low]

plt.scatter(
x=price_orange,
y=sales_per_day_orange,
s=profit_margin_orange * 10,
c=sugar_content_orange,
)
plt.scatter(
x=price_cereal,
y=sales_per_day_cereal,
s=profit_margin_cereal * 10,
c=sugar_content_cereal,
marker="d",
)
plt.show()
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Section: Changing the Transparency

import matplotlib.pyplot as plt
import numpy as np

low = (0, 1, 0)
medium = (1, 1, 0)
high = (1, 0, 0)

price_orange = np.asarray([2.50, 1.23, 4.02, 3.25, 5.00, 4.40])
sales_per_day_orange = np.asarray([34, 62, 49, 22, 13, 19])
profit_margin_orange = np.asarray([20, 35, 40, 20, 27.5, 15])
sugar_content_orange = [low, high, medium, medium, high, low]

price_cereal = np.asarray([1.50, 2.50, 1.15, 1.95])
sales_per_day_cereal = np.asarray([67, 34, 36, 12])
profit_margin_cereal = np.asarray([20, 42.5, 33.3, 18])
sugar_content_cereal = [low, high, medium, low]

plt.scatter(
x=price_orange,
y=sales_per_day_orange,
s=profit_margin_orange * 10,
c=sugar_content_orange,
alpha=0.5,
)
plt.scatter(
x=price_cereal,
y=sales_per_day_cereal,
s=profit_margin_cereal * 10,
c=sugar_content_cereal,
marker="d",
alpha=0.5,
)

plt.title("Sales vs Prices for Orange Drinks and Cereal Bars")
plt.legend(["Orange Drinks", "Cereal Bars"])
plt.xlabel("Price (Currency Unit)")
plt.ylabel("Average weekly sales")
plt.text(
3.2,
55,
"Size of marker = profit margin\nColor of marker = sugar content",
)

plt.show()
45 changes: 45 additions & 0 deletions visualizing-python-plt-scatter/step_08_colormap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Section: Customizing the Colormap and Style of Your Scatter Plot

import matplotlib.pyplot as plt
import numpy as np

price_orange = np.asarray([2.50, 1.23, 4.02, 3.25, 5.00, 4.40])
sales_per_day_orange = np.asarray([34, 62, 49, 22, 13, 19])
profit_margin_orange = np.asarray([20, 35, 40, 20, 27.5, 15])
sugar_content_orange = [15, 35, 22, 27, 38, 14]

price_cereal = np.asarray([1.50, 2.50, 1.15, 1.95])
sales_per_day_cereal = np.asarray([67, 34, 36, 12])
profit_margin_cereal = np.asarray([20, 42.5, 33.3, 18])
sugar_content_cereal = [21, 49, 29, 24]

plt.scatter(
x=price_orange,
y=sales_per_day_orange,
s=profit_margin_orange * 10,
c=sugar_content_orange,
cmap="jet",
alpha=0.5,
)
plt.scatter(
x=price_cereal,
y=sales_per_day_cereal,
s=profit_margin_cereal * 10,
c=sugar_content_cereal,
cmap="jet",
marker="d",
alpha=0.5,
)

plt.title("Sales vs Prices for Orange Drinks and Cereal Bars")
plt.legend(["Orange Drinks", "Cereal Bars"])
plt.xlabel("Price (Currency Unit)")
plt.ylabel("Average weekly sales")
plt.text(
2.7,
55,
"Size of marker = profit margin\nColor of marker = sugar content",
)
plt.colorbar()

plt.show()
18 changes: 18 additions & 0 deletions visualizing-python-plt-scatter/step_09_bus_distribution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Section: Further Scatter Plot Techniques With plt.scatter()

import matplotlib.pyplot as plt
import numpy as np

mean = 15, 45
sd = 5, 7

x = np.linspace(0, 59, 60) # Represents each minute within the hour
first_distribution = np.exp(-0.5 * ((x - mean[0]) / sd[0]) ** 2)
second_distribution = 0.9 * np.exp(-0.5 * ((x - mean[1]) / sd[1]) ** 2)
y = first_distribution + second_distribution
y = y / max(y)

plt.plot(x, y)
plt.ylabel("Relative probability of bus arrivals")
plt.xlabel("Minutes past the hour")
plt.show()
15 changes: 15 additions & 0 deletions visualizing-python-plt-scatter/step_10_bus_random_scatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Section: Further Scatter Plot Techniques With plt.scatter()

import random
import matplotlib.pyplot as plt
import numpy as np

n_buses = 40
bus_times = np.asarray([random.randint(0, 59) for _ in range(n_buses)])
bus_likelihood = np.asarray([random.random() for _ in range(n_buses)])

plt.scatter(x=bus_times, y=bus_likelihood)
plt.title("Randomly chosen bus arrival times and relative probabilities")
plt.ylabel("Relative probability of bus arrivals")
plt.xlabel("Minutes past the hour")
plt.show()
25 changes: 25 additions & 0 deletions visualizing-python-plt-scatter/step_11_bus_combined.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Section: Further Scatter Plot Techniques With plt.scatter()

import random
import matplotlib.pyplot as plt
import numpy as np

mean = 15, 45
sd = 5, 7

x = np.linspace(0, 59, 60)
first_distribution = np.exp(-0.5 * ((x - mean[0]) / sd[0]) ** 2)
second_distribution = 0.9 * np.exp(-0.5 * ((x - mean[1]) / sd[1]) ** 2)
y = first_distribution + second_distribution
y = y / max(y)

n_buses = 40
bus_times = np.asarray([random.randint(0, 59) for _ in range(n_buses)])
bus_likelihood = np.asarray([random.random() for _ in range(n_buses)])

plt.scatter(x=bus_times, y=bus_likelihood)
plt.plot(x, y)
plt.title("Randomly chosen bus arrival times and relative probabilities")
plt.ylabel("Relative probability of bus arrivals")
plt.xlabel("Minutes past the hour")
plt.show()
39 changes: 39 additions & 0 deletions visualizing-python-plt-scatter/step_12_bus_in_out_region.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Section: Further Scatter Plot Techniques With plt.scatter()

import random
import matplotlib.pyplot as plt
import numpy as np

mean = 15, 45
sd = 5, 7

x = np.linspace(0, 59, 60)
first_distribution = np.exp(-0.5 * ((x - mean[0]) / sd[0]) ** 2)
second_distribution = 0.9 * np.exp(-0.5 * ((x - mean[1]) / sd[1]) ** 2)
y = first_distribution + second_distribution
y = y / max(y)

n_buses = 40
bus_times = np.asarray([random.randint(0, 59) for _ in range(n_buses)])
bus_likelihood = np.asarray([random.random() for _ in range(n_buses)])

in_region = bus_likelihood < y[bus_times]
out_region = bus_likelihood >= y[bus_times]

plt.scatter(
x=bus_times[in_region],
y=bus_likelihood[in_region],
color="green",
)
plt.scatter(
x=bus_times[out_region],
y=bus_likelihood[out_region],
color="red",
marker="x",
)

plt.plot(x, y)
plt.title("Randomly chosen bus arrival times and relative probabilities")
plt.ylabel("Relative probability of bus arrivals")
plt.xlabel("Minutes past the hour")
plt.show()
Loading