pip install rtree
Run to view results
import matplotlib.pyplot as plt
import numpy as np
import os
import rtree
import random
Run to view results
potencias = [2,3,4,5,6]
Run to view results
RTree
import time
import pandas as pd
tiempos = dict()
for D in [2, 4, 8, 16, 32]:
tiempos[str(D)] = list()
K = 8
indexes = []
for i in potencias:
N = pow(10, i)
indexes.append(str(N))
for D in [2, 4, 8, 16, 32]:
# Eliminar los archivos
if os.path.exists("puntos.data"):
os.remove("puntos.data")
if os.path.exists("puntos.index"):
os.remove("puntos.index")
prop = rtree.index.Property()
prop.dimension = D
prop.buffering_capacity = 10 # M
prop.dat_extension = "data"
prop.idx_extension = "index"
# Generar los datos
data = np.random.sample((N,D))
# insertar los puntos
ind = rtree.index.Index("puntos", properties = prop)
j = 0
for punto in data:
ind.insert(j, punto)
j += 1
query = data[0]
start_time = time.time()
ind.nearest(query, num_results=K)
tiempos[str(D)].append(time.time() - start_time)
ind.close()
resultsRT = pd.DataFrame.from_dict(tiempos)
resultsRT.index = indexes
resultsRT
Run to view results
Lineal Scan
import time
import pandas as pd
import numpy as np
def ED(Q, Ci):
return np.linalg.norm(Q - Ci)
def KnnSearch(Q, k, C):
result = []
for index, row in C.iterrows():
dist = ED(Q[:len(row)], row.values)
result.append((index, dist))
result.sort(key=lambda x: x[1])
return result[:k]
tiempos = dict()
for D in [2, 4, 8, 16, 32]:
tiempos[str(D)] = list()
K = 8
indexes = []
for i in potencias:
N = pow(10, i)
indexes.append(str(N))
for D in [2, 4, 8, 16, 32]:
if os.path.exists("puntos.data"):
os.remove("puntos.data")
if os.path.exists("puntos.index"):
os.remove("puntos.index")
data = pd.DataFrame(np.random.sample((N, D)))
data.index = [str(x) for x in data.index]
query = data.iloc[0].values
start_time = time.time()
KnnSearch(query, K, data)
tiempos[str(D)].append(time.time() - start_time)
resultsKNN = pd.DataFrame.from_dict(tiempos)
resultsKNN.index = indexes
resultsKNN
Run to view results
Crear DF's
knn_col2 = resultsKNN['2']
rt1_col2 = resultsRT['2']
df_D2 = pd.DataFrame({'LinealScan': knn_col2, 'RTree': rt1_col2})
print(df_D2)
knn_col4 = resultsKNN['4']
rt1_col4 = resultsRT['4']
df_D4 = pd.DataFrame({'LinealScan': knn_col4, 'RTree': rt1_col4})
print(df_D4)
knn_col8 = resultsKNN['8']
rt1_col8 = resultsRT['8']
df_D8 = pd.DataFrame({'LinealScan': knn_col8, 'RTree': rt1_col8})
print(df_D8)
knn_col16 = resultsKNN['16']
rt1_col16 = resultsRT['16']
df_D16 = pd.DataFrame({'LinealScan': knn_col16, 'RTree': rt1_col16})
print(df_D16)
knn_col32 = resultsKNN['32']
rt1_col32 = resultsRT['32']
df_D32 = pd.DataFrame({'LinealScan': knn_col32, 'RTree': rt1_col32})
print(df_D32)
Run to view results
Graficarlos
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.cm as cm
dataframes = [df_D2, df_D4, df_D8, df_D16, df_D32]
titles = ["D=2", "D=4", "D=8", "D=16", "D=32"]
for i, df_combined in enumerate(dataframes):
fig = plt.figure()
ax1 = fig.add_subplot(111, projection='3d')
xpos, ypos = np.meshgrid([0, 1], np.arange(df_combined.shape[0]))
xpos = xpos.flatten()
ypos = ypos.flatten()
zpos = np.zeros_like(xpos)
dx = dy = 0.5
dz = df_combined.values.flatten()
unique_x = np.unique(xpos)
color_map = cm.get_cmap('viridis', len(unique_x))
ax1.bar3d(xpos, ypos, zpos, dx, dy, dz, shade=True, color=color_map(xpos / xpos.max()))
ax1.set_xticks([0.25, 1.25])
ax1.set_xticklabels(["Lineal Scan", "RTree"])
ax1.set_yticks(np.arange(len(potencias)) + 0.25)
ax1.set_yticklabels([10 ** p for p in potencias])
plt.title(f"Gráfico comparativo con {titles[i]}")
plt.show()
Run to view results