'''
作者:聂铭硕
功能:读取gml或mat文件,生成图的幂律分布
'''
from sklearn import linear_model
from scipy import optimize
import networkx as nx
import numpy as np
import scipy.io as scio
import matplotlib.pyplot as plt
import os
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
def read_from_mat(dataset):
file_dir = os.path.dirname(os.path.realpath('__file__'))
data_dir = os.path.join(file_dir, 'data/{}'.format(dataset))
data = scio.loadmat(data_dir)
adj = data['net'].todense()
G = nx.from_numpy_matrix(adj)
return G
def read_form_gml(dataset):
file_dir = os.path.dirname(os.path.realpath('__file__'))
data_dir = os.path.join(file_dir, 'data/{}'.format(dataset))
G = nx.read_gml(data_dir, label='label')
return G
def power(G):
# 存储y坐标的值
y_value = []
# 存储x坐标的值
x_value = []
for i in nx.degree_histogram(G):
y_value.append(float(i) / sum(nx.degree_histogram(G)))
for j in range(len(nx.degree_histogram(G))):
x_value.append(j)
# 使用x坐标和y坐标来构成一个字典
x_y_value = {}
for i in range(len(x_value)):
for j in range(len(y_value)):
if (i == j and y_value[j] != 0):
x_y_value[x_value[i]] = y_value[j]
fig = plt.figure()
ax1 = fig.add_subplot(111)
# 设置标题
# ax1.set_title(" ")
plt.xlabel('度k')
plt.ylabel("度为k的节点所占的比例P(k)")
plt.scatter(x_y_value.keys(), x_y_value.values(), c="black", s=10)
plt.savefig('度分布.svg',format='svg',dpi = 500)
plt.show()
def log_power(G):
# 存储y坐标的值
y_value = []
# 存储x坐标的值
x_value = []
for i in nx.degree_histogram(G):
y_value.append(float(i) / sum(nx.degree_histogram(G)))
for j in range(len(nx.degree_histogram(G))):
x_value.append(j)
# 使用x坐标和y坐标来构成一个字典
x_y_value = {}
for i in range(len(x_value)):
for j in range(len(y_value)):
if (i == j and y_value[j] != 0):
x_y_value[x_value[i]] = y_value[j]
X = np.log(list(x_y_value.keys()))
Y = np.log(list(x_y_value.values()))
# 模型数据准备
X_parameter = []
Y_parameter = []
for single_square_feet, single_price_value in zip(X, Y):
X_parameter.append([float(single_square_feet)])
Y_parameter.append([float(single_price_value)])
# 模型拟合
regr = linear_model.LinearRegression()
regr.fit(X_parameter, Y_parameter)
# 模型结果与得分
print('Coefficients: \n', regr.coef_, )
print("Intercept:\n", regr.intercept_)
y = regr.predict(X_parameter)
print(y)
print(X_parameter)
# 可视化
plt.scatter(X_parameter, Y_parameter, color='black')
plt.plot(X_parameter, regr.predict(X_parameter), color='blue', linewidth=3)
plt.xlabel('log(k)')
plt.ylabel('log(P(k))')
plt.savefig('log度分布.svg', format='svg', dpi=500)
plt.show()
if __name__ == '__main__':
# dataseet setting
dataset = 'arxiv.mat'
# loading dataset
print('Read the Dataset :\t' + dataset)
# method to select different dataset loading method based on dataset types.
if dataset.split('.')[1] == 'gml':
G = read_form_gml(dataset)
else:
G = read_from_mat(dataset)
print('number of nodes', nx.number_of_nodes(G))
power(G)
log_power(G)