I’ve been trying to scrape the first column from the table of this webpage using google apps script. When I observe network activity in dev tools, I could notice that I have to send post http requests with json parameters to get required response from that site.
I’ve already found success using a python script that I wrote earlier. I tried to mimic the logic that I used in python within the following script but no luck. I always end up getting 403 status code.
function parseOpensea() {
var data = {"id":"EventHistoryQuery","query":"query EventHistoryQuery(n $archetype: ArchetypeInputTypen $bundle: BundleSlugn $collections: [CollectionSlug!]n $categories: [CollectionSlug!]n $chains: [ChainScalar!]n $eventTypes: [EventType!]n $cursor: Stringn $count: Int = 16n $showAll: Boolean = falsen $identity: IdentityInputTypen) {n ...EventHistory_data_L1XK6n}nnfragment AccountLink_data on AccountType {n addressn confign isCompromisedn user {n publicUsernamen idn }n ...ProfileImage_datan ...wallet_accountKeyn ...accounts_urln}nnfragment AssetCell_asset on AssetType {n collection {n namen idn }n namen ...AssetMedia_assetn ...asset_urln}nnfragment AssetCell_assetBundle on AssetBundleType {n assetQuantities(first: 2) {n edges {n node {n asset {n collection {n namen idn }n namen ...AssetMedia_assetn ...asset_urln idn }n relayIdn idn }n }n }n namen slugn}nnfragment AssetMedia_asset on AssetType {n animationUrln backgroundColorn collection {n displayData {n cardDisplayStylen }n idn }n isDelistedn imageUrln displayImageUrln}nnfragment AssetQuantity_data on AssetQuantityType {n asset {n ...Price_datan idn }n quantityn}nnfragment CollectionLink_assetContract on AssetContractType {n addressn blockExplorerLinkn}nnfragment CollectionLink_collection on CollectionType {n namen ...collection_urln ...verification_datan}nnfragment EventHistory_data_L1XK6 on Query {n assetEvents(after: $cursor, bundle: $bundle, archetype: $archetype, first: $count, categories: $categories, collections: $collections, chains: $chains, eventTypes: $eventTypes, identity: $identity, includeHidden: true) {n edges {n node {n assetBundle @include(if: $showAll) {n relayIdn ...AssetCell_assetBundlen ...bundle_urln idn }n assetQuantity {n asset @include(if: $showAll) {n relayIdn assetContract {n ...CollectionLink_assetContractn idn }n ...AssetCell_assetn ...asset_urln collection {n ...CollectionLink_collectionn idn }n idn }n ...quantity_datan idn }n relayIdn eventTimestampn eventTypen offerExpiredn customEventNamen ...utilsAssetEventLabeln devFee {n asset {n assetContract {n chainn idn }n idn }n quantityn ...AssetQuantity_datan idn }n devFeePaymentEvent {n ...EventTimestamp_datan idn }n fromAccount {n addressn ...AccountLink_datan idn }n price {n quantityn quantityInEthn ...AssetQuantity_datan idn }n endingPrice {n quantityn ...AssetQuantity_datan idn }n seller {n ...AccountLink_datan idn }n toAccount {n ...AccountLink_datan idn }n winnerAccount {n ...AccountLink_datan idn }n ...EventTimestamp_datan idn __typenamen }n cursorn }n pageInfo {n endCursorn hasNextPagen }n }n}nnfragment EventTimestamp_data on AssetEventType {n eventTimestampn transaction {n blockExplorerLinkn idn }n}nnfragment Price_data on AssetType {n decimalsn imageUrln symboln usdSpotPricen assetContract {n blockExplorerLinkn chainn idn }n}nnfragment ProfileImage_data on AccountType {n imageUrln addressn}nnfragment accounts_url on AccountType {n addressn user {n publicUsernamen idn }n}nnfragment asset_url on AssetType {n assetContract {n addressn chainn idn }n tokenIdn}nnfragment bundle_url on AssetBundleType {n slugn}nnfragment collection_url on CollectionType {n slugn}nnfragment quantity_data on AssetQuantityType {n asset {n decimalsn idn }n quantityn}nnfragment utilsAssetEventLabel on AssetEventType {n isMintn eventTypen}nnfragment verification_data on CollectionType {n isMintablen isSafelistedn isVerifiedn}nnfragment wallet_accountKey on AccountType {n addressn}n","variables":{"archetype":null,"bundle":null,"collections":["boredapeyachtclub"],"categories":null,"chains":null,"eventTypes":["AUCTION_SUCCESSFUL","OFFER_ENTERED"],"cursor":null,"count":16,"showAll":true,"identity":null}};
var options = {
"method" : "POST",
'payload': JSON.stringify({'query': data}),
'muteHttpExceptions': true,
"headers" : {
"accept": "*/*",
"content-type": "application/json",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
"x-signed-query": "53ff23d0333e3997651740e18c7ea66da6f29fcc5e4d5aacd302574209e44e56",
}
};
var webURL = "https://api.opensea.io/graphql/";
var response = UrlFetchApp.fetch(webURL,options);
// var item = JSON.parse(response.getContentText());
console.log(response.getResponseCode());
// var pid = item.data.assetEvents;
// console.log(pid);
}
How to fetch the first coulmn from the table of this website issuing post requests with json parameters?
6
Ref
The HTTP 403 Forbidden response status code indicates that the server understands the request but refuses to authorize it. This status is similar to 401, but for the 403 Forbidden status code re-authenticating makes no difference. The access is permanently forbidden and tied to the application logic, such as insufficient rights to a resource.
It is considered that Google Apps Script cannot access to the URL. For example, if you run the script on your browser, how about access to the URL on a dialog of Google Spreadsheet?I’m willing to go for any technique that may help me read the content of that site @Tanaike.
Thank you for replying. By the way, where is
the first column from the table of this webpage
?If you click on the left arrow sign
<-
right next tofilter
, you will see the first column. The first column is the earlier column of the headeritem
. This is how that looks like.Thank you for replying. I could understand your expected values. When I could find the solution, I would like to post it.