【复杂网络】读取gml或mat文件,生成幂律分布

neunms    2021-03-21 14:26

'''
    作者:聂铭硕
    功能:读取gml或mat文件,生成图的幂律分布
'''
from sklearn import linear_model
from scipy import optimize
import networkx as nx
import numpy as np
import scipy.io as scio
import matplotlib.pyplot as plt
import os
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

def read_from_mat(dataset):
    file_dir = os.path.dirname(os.path.realpath('__file__'))
    data_dir = os.path.join(file_dir, 'data/{}'.format(dataset))
    data = scio.loadmat(data_dir)
    adj = data['net'].todense()
    G = nx.from_numpy_matrix(adj)
    return G

def read_form_gml(dataset):
    file_dir = os.path.dirname(os.path.realpath('__file__'))
    data_dir = os.path.join(file_dir, 'data/{}'.format(dataset))
    G = nx.read_gml(data_dir, label='label')
    return G

def power(G):
    # 存储y坐标的值
    y_value = []
    # 存储x坐标的值
    x_value = []
    for i in nx.degree_histogram(G):
        y_value.append(float(i) / sum(nx.degree_histogram(G)))
    for j in range(len(nx.degree_histogram(G))):
        x_value.append(j)
        # 使用x坐标和y坐标来构成一个字典
    x_y_value = {}
    for i in range(len(x_value)):
        for j in range(len(y_value)):
            if (i == j and y_value[j] != 0):
                x_y_value[x_value[i]] = y_value[j]
    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    # 设置标题
    # ax1.set_title("  ")
    plt.xlabel('度k')
    plt.ylabel("度为k的节点所占的比例P(k)")
    plt.scatter(x_y_value.keys(), x_y_value.values(), c="black", s=10)
    plt.savefig('度分布.svg',format='svg',dpi = 500)
    plt.show()

def log_power(G):
    # 存储y坐标的值
    y_value = []
    # 存储x坐标的值
    x_value = []
    for i in nx.degree_histogram(G):
        y_value.append(float(i) / sum(nx.degree_histogram(G)))
    for j in range(len(nx.degree_histogram(G))):
        x_value.append(j)
        # 使用x坐标和y坐标来构成一个字典
    x_y_value = {}
    for i in range(len(x_value)):
        for j in range(len(y_value)):
            if (i == j and y_value[j] != 0):
                x_y_value[x_value[i]] = y_value[j]

    X = np.log(list(x_y_value.keys()))
    Y = np.log(list(x_y_value.values()))
    # 模型数据准备
    X_parameter = []
    Y_parameter = []
    for single_square_feet, single_price_value in zip(X, Y):
        X_parameter.append([float(single_square_feet)])
        Y_parameter.append([float(single_price_value)])

    # 模型拟合
    regr = linear_model.LinearRegression()
    regr.fit(X_parameter, Y_parameter)
    # 模型结果与得分
    print('Coefficients: \n', regr.coef_, )
    print("Intercept:\n", regr.intercept_)


    y = regr.predict(X_parameter)
    print(y)
    print(X_parameter)
    # 可视化
    plt.scatter(X_parameter, Y_parameter, color='black')
    plt.plot(X_parameter, regr.predict(X_parameter), color='blue', linewidth=3)

    plt.xlabel('log(k)')
    plt.ylabel('log(P(k))')
    plt.savefig('log度分布.svg', format='svg', dpi=500)
    plt.show()

if __name__ == '__main__':
    # dataseet setting
    dataset = 'arxiv.mat'
    # loading dataset
    print('Read the Dataset :\t' + dataset)
    # method to select different dataset loading method based on dataset types.
    if dataset.split('.')[1] == 'gml':
        G = read_form_gml(dataset)
    else:
        G = read_from_mat(dataset)
    print('number of nodes', nx.number_of_nodes(G))
    power(G)
    log_power(G)
 
Last Modified: 2021-03-21 14:26
Views: 1.2K

[[total]] comments

Post your comment
  1. [[item.time]]
    [[item.user.username]] [[item.floor]]Floor
  2. Click to load more...
  3. Post your comment