library(patentsview)
library(dplyr)
library(visNetwork)
library(magrittr)
library(stringr)
library(knitr)
library(tidyverse)
library(readxl)
#creating query for fetching Tencent's patent data
tencent_query <-
with_qfuns(
and(
contains(assignee_organization = "tencent"),
contains(patent_type = "utility")
)
)
#querying Tencent's patent data
tencent_utility <- search_pv(
query = tencent_query,
endpoint = "patents",
fields = c("patent_number", "patent_title", "cited_patent_number", "citedby_patent_number","assignee_organization", "patent_num_cited_by_us_patents"),
all_pages = TRUE
)
#unnesting Tencent's patent data
tencent_lst <- unnest_pv_data(tencent_utility$data, pk = "patent_number")
#forward citation patent data (patents cited Tencent's patent portfolio)
tencent_citedby <- tencent_lst$citedby_patents$citedby_patent_number %>%
unique() %>%
na.omit()
#split the forward citation patent data into 4 queries as patentsview has a API call limit
citedby_query_1 <- qry_funs$contains(patent_number = tencent_citedby[1:100])
citedby_query_2 <- qry_funs$contains(patent_number = tencent_citedby[101:200])
citedby_query_3 <- qry_funs$contains(patent_number = tencent_citedby[201:300])
citedby_query_4 <- qry_funs$contains(patent_number = tencent_citedby[301:364])
#querying the forward citation patent data
citedby_utility_1 <- search_pv(
query = citedby_query_1,
endpoint = "patents",
fields = c("patent_number", "patent_title", "cited_patent_number", "citedby_patent_number","assignee_organization"),
all_pages = TRUE
)
citedby_utility_2 <- search_pv(
query = citedby_query_2,
endpoint = "patents",
fields = c("patent_number", "patent_title", "cited_patent_number", "citedby_patent_number","assignee_organization"),
all_pages = TRUE
)
citedby_utility_3 <- search_pv(
query = citedby_query_3,
endpoint = "patents",
fields = c("patent_number", "patent_title", "cited_patent_number", "citedby_patent_number","assignee_organization"),
all_pages = TRUE
)
citedby_utility_4 <- search_pv(
query = citedby_query_4,
endpoint = "patents",
fields = c("patent_number", "patent_title", "cited_patent_number", "citedby_patent_number","assignee_organization"),
all_pages = TRUE
)
#unnesting the forward citation patent data
unnest_citedby_1 <- unnest_pv_data(citedby_utility_1$data, pk = "patent_number")
unnest_citedby_2 <- unnest_pv_data(citedby_utility_2$data, pk = "patent_number")
unnest_citedby_3 <- unnest_pv_data(citedby_utility_3$data, pk = "patent_number")
unnest_citedby_4 <- unnest_pv_data(citedby_utility_4$data, pk = "patent_number")
#merge company name
citedby_1_df <- cbind(unnest_citedby_1$patents, group=unnest_citedby_1$assignees$assignee_organization, stringsAsFactors=FALSE)
citedby_2_df <- cbind(unnest_citedby_2$patents, group=unnest_citedby_2$assignees[-c(35, 61),]$assignee_organization, stringsAsFactors=FALSE)
citedby_3_df <- cbind(unnest_citedby_3$patents, group=unnest_citedby_3$assignees[-c(19,55),]$assignee_organization, stringsAsFactors=FALSE)
citedby_4_df <- cbind(unnest_citedby_4$patents, group=unnest_citedby_4$assignees[-24,]$assignee_organization, stringsAsFactors=FALSE)
#merge the forward citation patent data together
citedby_df <- rbind(citedby_1_df, citedby_2_df, citedby_3_df, citedby_4_df) %>%
unique()
#remove Tencent's patents which have 0 citation
tencent_utility_df <- cbind(tencent_lst$patents, group=tencent_lst$assignees$assignee_organization, stringsAsFactors=FALSE)
tencent_utility_df <- tencent_utility_df[tencent_utility_df$patent_num_cited_by_us_patents!=0,]
tencent_utility_df <- tencent_utility_df[,-3]
#merge Tencent's patent data and the forward citation patent data as nodes in citation network
merge_df <- rbind(citedby_df,tencent_utility_df) %>%
unique()
#visualization of forward citation netword
pat_title <- function(title, number) {
temp_title <- str_wrap(title)
i <- gsub("\\n", "<br>", temp_title)
paste0('<a href="https://patents.google.com/patent/US', number, '">', i, '</a>')
}
edges <-
tencent_lst$citedby_patents %>%
na.omit() %>%
set_colnames(c("from", "to"))
nodes <-
merge_df %>%
mutate(
id = patent_number,
label = patent_number,
title = pat_title(patent_title, patent_number)
)
visNetwork(
nodes = nodes, edges = edges, height = "1280px", width = "100%",
main = "Which is Tencent's core patent(s)?"
) %>%
visGroups(groupname = "Tencent Technology (Shenzhen) Company", color = "blue") %>%
visLegend(width = 0.1, position = "right", main = "Group", zoom = TRUE) %>%
visEdges(arrows = list(to = list(enabled = TRUE))) %>%
visOptions(selectedBy = "group",
highlightNearest = TRUE,
nodesIdSelection = TRUE) %>%
visIgraphLayout()
The assumption is the importance of a patent has a position correlation with the number of citedby (how many times this patent is cited by other patents), Tencent’s patents are colored in light blue
Based on the assumptino above, the core patent should lies in the biggest cluster
From the figure above, it can be concluded that the core patent is US7512407B2 Instant messaging system and method, which is the most cited by patent of Tencent’s patent portfolio
To verify this conslusion, we turn to claim 1 (the broadest protecting scope) of this patent
The claim is about a method of instant communication on a wireless communication device
Remmber Tencent’s 2 most successful products?